From 0354be75d8ee28c6003d01c22e6e56862f07a998 Mon Sep 17 00:00:00 2001 From: Kirk Mayo Date: Thu, 15 Dec 2016 10:34:59 +1300 Subject: [PATCH 1/2] Added options to bypass cache and follow redirected links --- README.md | 21 +++++++++++++++++++++ code/tasks/CurlLinkChecker.php | 20 ++++++++++++++------ 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 9bae495..bee33bf 100644 --- a/README.md +++ b/README.md @@ -80,3 +80,24 @@ file in mysite/_config - 401 - 403 - 501 + +## Follow 301 redirects ## + +You may want to follow a redirected URL a example of this would be redirecting from http to https +can give you a false poitive as the http code of 301 will be returned which will be classed +as a working link. + +To allow redirects to be followed setup the following config in your config.yml + + # Follow 301 redirects + CurlLinkChecker: + FollowLocation: 1 + +## Bypass cache ## + +By default the task will attempt to cache any results the cache can be bypassed with the +following config in config.yml. + + # Bypass SS_Cache + CurlLinkChecker: + BypassCache: 1 diff --git a/code/tasks/CurlLinkChecker.php b/code/tasks/CurlLinkChecker.php index a84439d..487285b 100644 --- a/code/tasks/CurlLinkChecker.php +++ b/code/tasks/CurlLinkChecker.php @@ -28,22 +28,30 @@ class CurlLinkChecker implements LinkChecker { // Skip non-external links if(!preg_match('/^https?[^:]*:\/\//', $href)) return null; - // Check if we have a cached result - $cacheKey = md5($href); - $result = $this->getCache()->load($cacheKey); - if($result !== false) return $result; + if (!Config::inst()->get('CurlLinkChecker', 'BypassCache')) { + // Check if we have a cached result + $cacheKey = md5($href); + $result = $this->getCache()->load($cacheKey); + if($result !== false) return $result; + } // No cached result so just request $handle = curl_init($href); curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE); + // do we want to follow any redirect locations eg http to https + if (Config::inst()->get('CurlLinkChecker', 'FollowLocation')) { + curl_setopt($handle, CURLOPT_FOLLOWLOCATION, TRUE); + } curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5); curl_setopt($handle, CURLOPT_TIMEOUT, 10); curl_exec($handle); $httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE); curl_close($handle); - // Cache result - $this->getCache()->save($httpCode, $cacheKey); + if (!Config::inst()->get('CurlLinkChecker', 'BypassCache')) { + // Cache result + $this->getCache()->save($httpCode, $cacheKey); + } return $httpCode; } } From 9e9946ef647db603c8be0f5bd69e37c35ac1680a Mon Sep 17 00:00:00 2001 From: Kirk Mayo Date: Sat, 3 Feb 2018 13:55:22 +1300 Subject: [PATCH 2/2] Added config annotations to CurlLinkChecker --- code/tasks/CurlLinkChecker.php | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/code/tasks/CurlLinkChecker.php b/code/tasks/CurlLinkChecker.php index 487285b..ac05542 100644 --- a/code/tasks/CurlLinkChecker.php +++ b/code/tasks/CurlLinkChecker.php @@ -5,6 +5,24 @@ */ class CurlLinkChecker implements LinkChecker { + /** + * If we want to follow redirects a 301 http code for example + * Set via YAML file + * + * @config + * @var boolean + */ + private static $FollowLocation = false; + + /** + * If we want to bypass the cache + * Set via YAML file + * + * @config + * @var boolean + */ + private static $BypassCache = false; + /** * Return cache *