Added options to bypass cache and follow redirected links

This commit is contained in:
Kirk Mayo 2016-12-15 10:34:59 +13:00
parent b235367b20
commit 0354be75d8
2 changed files with 35 additions and 6 deletions

View File

@ -80,3 +80,24 @@ file in mysite/_config
- 401
- 403
- 501
## Follow 301 redirects ##
You may want to follow a redirected URL a example of this would be redirecting from http to https
can give you a false poitive as the http code of 301 will be returned which will be classed
as a working link.
To allow redirects to be followed setup the following config in your config.yml
# Follow 301 redirects
CurlLinkChecker:
FollowLocation: 1
## Bypass cache ##
By default the task will attempt to cache any results the cache can be bypassed with the
following config in config.yml.
# Bypass SS_Cache
CurlLinkChecker:
BypassCache: 1

View File

@ -28,22 +28,30 @@ class CurlLinkChecker implements LinkChecker {
// Skip non-external links
if(!preg_match('/^https?[^:]*:\/\//', $href)) return null;
if (!Config::inst()->get('CurlLinkChecker', 'BypassCache')) {
// Check if we have a cached result
$cacheKey = md5($href);
$result = $this->getCache()->load($cacheKey);
if($result !== false) return $result;
}
// No cached result so just request
$handle = curl_init($href);
curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE);
// do we want to follow any redirect locations eg http to https
if (Config::inst()->get('CurlLinkChecker', 'FollowLocation')) {
curl_setopt($handle, CURLOPT_FOLLOWLOCATION, TRUE);
}
curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($handle, CURLOPT_TIMEOUT, 10);
curl_exec($handle);
$httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE);
curl_close($handle);
if (!Config::inst()->get('CurlLinkChecker', 'BypassCache')) {
// Cache result
$this->getCache()->save($httpCode, $cacheKey);
}
return $httpCode;
}
}