Added options to bypass cache and follow redirected links

This commit is contained in:
Kirk Mayo 2016-12-15 10:34:59 +13:00
parent b235367b20
commit 0354be75d8
2 changed files with 35 additions and 6 deletions

View File

@ -80,3 +80,24 @@ file in mysite/_config
- 401 - 401
- 403 - 403
- 501 - 501
## Follow 301 redirects ##
You may want to follow a redirected URL a example of this would be redirecting from http to https
can give you a false poitive as the http code of 301 will be returned which will be classed
as a working link.
To allow redirects to be followed setup the following config in your config.yml
# Follow 301 redirects
CurlLinkChecker:
FollowLocation: 1
## Bypass cache ##
By default the task will attempt to cache any results the cache can be bypassed with the
following config in config.yml.
# Bypass SS_Cache
CurlLinkChecker:
BypassCache: 1

View File

@ -28,22 +28,30 @@ class CurlLinkChecker implements LinkChecker {
// Skip non-external links // Skip non-external links
if(!preg_match('/^https?[^:]*:\/\//', $href)) return null; if(!preg_match('/^https?[^:]*:\/\//', $href)) return null;
// Check if we have a cached result if (!Config::inst()->get('CurlLinkChecker', 'BypassCache')) {
$cacheKey = md5($href); // Check if we have a cached result
$result = $this->getCache()->load($cacheKey); $cacheKey = md5($href);
if($result !== false) return $result; $result = $this->getCache()->load($cacheKey);
if($result !== false) return $result;
}
// No cached result so just request // No cached result so just request
$handle = curl_init($href); $handle = curl_init($href);
curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE); curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE);
// do we want to follow any redirect locations eg http to https
if (Config::inst()->get('CurlLinkChecker', 'FollowLocation')) {
curl_setopt($handle, CURLOPT_FOLLOWLOCATION, TRUE);
}
curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5); curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($handle, CURLOPT_TIMEOUT, 10); curl_setopt($handle, CURLOPT_TIMEOUT, 10);
curl_exec($handle); curl_exec($handle);
$httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE); $httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE);
curl_close($handle); curl_close($handle);
// Cache result if (!Config::inst()->get('CurlLinkChecker', 'BypassCache')) {
$this->getCache()->save($httpCode, $cacheKey); // Cache result
$this->getCache()->save($httpCode, $cacheKey);
}
return $httpCode; return $httpCode;
} }
} }