Merge pull request #22 from textagroup/1.1

1.1 added bypass cache and follow location
This commit is contained in:
Robbie Averill 2018-02-05 01:35:27 +13:00 committed by GitHub
commit 7a34198567
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 53 additions and 6 deletions

View File

@ -80,3 +80,24 @@ file in mysite/_config
- 401 - 401
- 403 - 403
- 501 - 501
## Follow 301 redirects ##
You may want to follow a redirected URL a example of this would be redirecting from http to https
can give you a false poitive as the http code of 301 will be returned which will be classed
as a working link.
To allow redirects to be followed setup the following config in your config.yml
# Follow 301 redirects
CurlLinkChecker:
FollowLocation: 1
## Bypass cache ##
By default the task will attempt to cache any results the cache can be bypassed with the
following config in config.yml.
# Bypass SS_Cache
CurlLinkChecker:
BypassCache: 1

View File

@ -5,6 +5,24 @@
*/ */
class CurlLinkChecker implements LinkChecker { class CurlLinkChecker implements LinkChecker {
/**
* If we want to follow redirects a 301 http code for example
* Set via YAML file
*
* @config
* @var boolean
*/
private static $FollowLocation = false;
/**
* If we want to bypass the cache
* Set via YAML file
*
* @config
* @var boolean
*/
private static $BypassCache = false;
/** /**
* Return cache * Return cache
* *
@ -28,22 +46,30 @@ class CurlLinkChecker implements LinkChecker {
// Skip non-external links // Skip non-external links
if(!preg_match('/^https?[^:]*:\/\//', $href)) return null; if(!preg_match('/^https?[^:]*:\/\//', $href)) return null;
if (!Config::inst()->get('CurlLinkChecker', 'BypassCache')) {
// Check if we have a cached result // Check if we have a cached result
$cacheKey = md5($href); $cacheKey = md5($href);
$result = $this->getCache()->load($cacheKey); $result = $this->getCache()->load($cacheKey);
if($result !== false) return $result; if($result !== false) return $result;
}
// No cached result so just request // No cached result so just request
$handle = curl_init($href); $handle = curl_init($href);
curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE); curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE);
// do we want to follow any redirect locations eg http to https
if (Config::inst()->get('CurlLinkChecker', 'FollowLocation')) {
curl_setopt($handle, CURLOPT_FOLLOWLOCATION, TRUE);
}
curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5); curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($handle, CURLOPT_TIMEOUT, 10); curl_setopt($handle, CURLOPT_TIMEOUT, 10);
curl_exec($handle); curl_exec($handle);
$httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE); $httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE);
curl_close($handle); curl_close($handle);
if (!Config::inst()->get('CurlLinkChecker', 'BypassCache')) {
// Cache result // Cache result
$this->getCache()->save($httpCode, $cacheKey); $this->getCache()->save($httpCode, $cacheKey);
}
return $httpCode; return $httpCode;
} }
} }