Merge pull request #22 from textagroup/1.1

1.1 added bypass cache and follow location
This commit is contained in:
Robbie Averill 2018-02-05 01:35:27 +13:00 committed by GitHub
commit 7a34198567
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 53 additions and 6 deletions

View File

@ -80,3 +80,24 @@ file in mysite/_config
- 401
- 403
- 501
## Follow 301 redirects ##
You may want to follow a redirected URL a example of this would be redirecting from http to https
can give you a false poitive as the http code of 301 will be returned which will be classed
as a working link.
To allow redirects to be followed setup the following config in your config.yml
# Follow 301 redirects
CurlLinkChecker:
FollowLocation: 1
## Bypass cache ##
By default the task will attempt to cache any results the cache can be bypassed with the
following config in config.yml.
# Bypass SS_Cache
CurlLinkChecker:
BypassCache: 1

View File

@ -5,6 +5,24 @@
*/
class CurlLinkChecker implements LinkChecker {
/**
* If we want to follow redirects a 301 http code for example
* Set via YAML file
*
* @config
* @var boolean
*/
private static $FollowLocation = false;
/**
* If we want to bypass the cache
* Set via YAML file
*
* @config
* @var boolean
*/
private static $BypassCache = false;
/**
* Return cache
*
@ -28,22 +46,30 @@ class CurlLinkChecker implements LinkChecker {
// Skip non-external links
if(!preg_match('/^https?[^:]*:\/\//', $href)) return null;
// Check if we have a cached result
$cacheKey = md5($href);
$result = $this->getCache()->load($cacheKey);
if($result !== false) return $result;
if (!Config::inst()->get('CurlLinkChecker', 'BypassCache')) {
// Check if we have a cached result
$cacheKey = md5($href);
$result = $this->getCache()->load($cacheKey);
if($result !== false) return $result;
}
// No cached result so just request
$handle = curl_init($href);
curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE);
// do we want to follow any redirect locations eg http to https
if (Config::inst()->get('CurlLinkChecker', 'FollowLocation')) {
curl_setopt($handle, CURLOPT_FOLLOWLOCATION, TRUE);
}
curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($handle, CURLOPT_TIMEOUT, 10);
curl_exec($handle);
$httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE);
curl_close($handle);
// Cache result
$this->getCache()->save($httpCode, $cacheKey);
if (!Config::inst()->get('CurlLinkChecker', 'BypassCache')) {
// Cache result
$this->getCache()->save($httpCode, $cacheKey);
}
return $httpCode;
}
}