From a4ede246ab6c21e59f83ce8506c72b91a55a0325 Mon Sep 17 00:00:00 2001 From: Damian Mooyman Date: Thu, 7 Aug 2014 13:56:15 +1200 Subject: [PATCH] Tests, refactor, cleanup, fix issues --- _config.php | 1 - _config/injector.yml | 5 + code/controllers/CMSExternalLinks.php | 52 ++--- code/jobs/CheckExternalLinksJob.php | 2 +- code/model/BrokenExternalLink.php | 35 ++- code/model/BrokenExternalPageTrack.php | 28 +++ code/model/BrokenExternalPageTrackStatus.php | 128 +++++++++++ code/reports/BrokenExternalLinksReport.php | 73 +++---- code/tasks/CheckExternalLinks.php | 218 ------------------- code/tasks/CheckExternalLinksTask.php | 204 +++++++++++++++++ code/tasks/CurlLinkChecker.php | 49 +++++ code/tasks/LinkChecker.php | 15 ++ composer.json | 7 +- javascript/BrokenExternalLinksReport.js | 108 ++++----- tests/ExternalLinksTest.php | 142 ++++++++++-- tests/ExternalLinksTest.yml | 63 +++++- 16 files changed, 738 insertions(+), 392 deletions(-) delete mode 100644 _config.php create mode 100644 _config/injector.yml create mode 100644 code/model/BrokenExternalPageTrack.php create mode 100644 code/model/BrokenExternalPageTrackStatus.php delete mode 100644 code/tasks/CheckExternalLinks.php create mode 100644 code/tasks/CheckExternalLinksTask.php create mode 100644 code/tasks/CurlLinkChecker.php create mode 100644 code/tasks/LinkChecker.php diff --git a/_config.php b/_config.php deleted file mode 100644 index b3d9bbc..0000000 --- a/_config.php +++ /dev/null @@ -1 +0,0 @@ -exists()) return null; - echo json_encode(array( + // Set headers + HTTP::set_cache_age(0); + HTTP::add_cache_headers($this->response); + $this->response + ->addHeader('Content-Type', 'application/json') + ->addHeader('Content-Encoding', 'UTF-8') + ->addHeader('X-Content-Type-Options', 'nosniff'); + + // Format status + $track = BrokenExternalPageTrackStatus::get_latest(); + if($track) return json_encode(array( 'TrackID' => $track->ID, 'Status' => $track->Status, - 'Completed' => $track->CompletedPages, - 'Total' => $track->TotalPages + 'Completed' => $track->getCompletedPages(), + 'Total' => $track->getTotalPages() )); } @@ -25,36 +33,22 @@ class CMSExternalLinks_Controller extends Controller { * Starts a broken external link check */ public function start() { - $status = checkExternalLinks::getLatestTrackStatus(); // return if the a job is already running - if ($status == 'Running') { - return; - } + $status = BrokenExternalPageTrackStatus::get_latest(); + if ($status && $status->Status == 'Running') return; + + // Create a new job if (class_exists('QueuedJobService')) { - $pages = Versioned::get_by_stage('SiteTree', 'Stage'); - $noPages = count($pages); - - $track = BrokenExternalPageTrackStatus::create(); - $track->TotalPages = $noPages; - $track->Status = 'Running'; - $track->write(); - - foreach ($pages as $page) { - $trackPage = BrokenExternalPageTrack::create(); - $trackPage->PageID = $page->ID; - $trackPage->TrackID = $track->ID; - $trackPage->write(); - } - + // Force the creation of a new run + BrokenExternalPageTrackStatus::create_status(); $checkLinks = new CheckExternalLinksJob(); - singleton('QueuedJobService') - ->queueJob($checkLinks, date('Y-m-d H:i:s', time() + 1)); + singleton('QueuedJobService')->queueJob($checkLinks); } else { //TODO this hangs as it waits for the connection to be released - // should return back and continue processing + // should return back and continue processing // http://us3.php.net/manual/en/features.connection-handling.php - $task = new CheckExternalLinks(); - $task->run(); + $task = CheckExternalLinksTask::create(); + $task->runLinksCheck(); } } } diff --git a/code/jobs/CheckExternalLinksJob.php b/code/jobs/CheckExternalLinksJob.php index bb79e67..3f4311a 100644 --- a/code/jobs/CheckExternalLinksJob.php +++ b/code/jobs/CheckExternalLinksJob.php @@ -24,7 +24,7 @@ class CheckExternalLinksJob extends AbstractQueuedJob implements QueuedJob { * Check an individual page */ public function process() { - $task = new CheckExternalLinks(); + $task = CheckExternalLinksTask::create(); $track = $task->runLinksCheck(1); $this->currentStep = $track->CompletedPages; $this->totalSteps = $track->TotalPages; diff --git a/code/model/BrokenExternalLink.php b/code/model/BrokenExternalLink.php index 24ff85f..7d21e07 100644 --- a/code/model/BrokenExternalLink.php +++ b/code/model/BrokenExternalLink.php @@ -1,5 +1,11 @@ 'Page', - 'Track' => 'BrokenExternalLink' + 'Track' => 'BrokenExternalPageTrack', + 'Status' => 'BrokenExternalPageTrackStatus' ); + /** + * @return SiteTree + */ + public function Page() { + return $this->Track()->Page(); + } + public static $summary_fields = array( 'Page.Title' => 'Page', 'HTTPCode' => 'HTTP Code', @@ -33,22 +46,4 @@ class BrokenExternalLink extends DataObject { } } -class BrokenExternalPageTrackStatus extends DataObject { - private static $db = array( - 'Status' => 'Enum("Completed, Running", "Running")', - 'TotalPages' => 'Int', - 'CompletedPages' => 'Int', - 'JobInfo' => 'Varchar(255)' - ); -} -class BrokenExternalPageTrack extends DataObject { - private static $db = array( - 'TrackID' => 'Int', - 'Processed' => 'Boolean' - ); - - private static $has_one = array( - 'Page' => 'Page' - ); -} diff --git a/code/model/BrokenExternalPageTrack.php b/code/model/BrokenExternalPageTrack.php new file mode 100644 index 0000000..828b541 --- /dev/null +++ b/code/model/BrokenExternalPageTrack.php @@ -0,0 +1,28 @@ + 'Boolean' + ); + + private static $has_one = array( + 'Page' => 'SiteTree', + 'Status' => 'BrokenExternalPageTrackStatus' + ); + + private static $has_many = array( + 'BrokenLinks' => 'BrokenExternalLink' + ); + + /** + * @return SiteTree + */ + public function Page() { + return Versioned::get_by_stage('SiteTree', 'Stage') + ->byID($this->PageID); + } +} diff --git a/code/model/BrokenExternalPageTrackStatus.php b/code/model/BrokenExternalPageTrackStatus.php new file mode 100644 index 0000000..141b996 --- /dev/null +++ b/code/model/BrokenExternalPageTrackStatus.php @@ -0,0 +1,128 @@ + 'Enum("Completed, Running", "Running")', + 'JobInfo' => 'Varchar(255)' + ); + + private static $has_many = array( + 'TrackedPages' => 'BrokenExternalPageTrack', + 'BrokenLinks' => 'BrokenExternalLink' + ); + + /** + * Get the latest track status + * + * @return self + */ + public static function get_latest() { + return self::get() + ->sort('ID', 'DESC') + ->first(); + } + + /** + * Gets the list of Pages yet to be checked + * + * @return DataList + */ + public function getIncompletePageList() { + $pageIDs = $this + ->getIncompleteTracks() + ->column('PageID'); + if($pageIDs) return Versioned::get_by_stage('SiteTree', 'Stage') + ->byIDs($pageIDs); + } + + /** + * Get the list of incomplete BrokenExternalPageTrack + * + * @return DataList + */ + public function getIncompleteTracks() { + return $this + ->TrackedPages() + ->filter('Processed', 0); + } + + /** + * Get total pages count + */ + public function getTotalPages() { + return $this->TrackedPages()->count(); + } + + /** + * Get completed pages count + */ + public function getCompletedPages() { + return $this + ->TrackedPages() + ->filter('Processed', 1) + ->count(); + } + + /** + * Returns the latest run, or otherwise creates a new one + * + * @return self + */ + public static function get_or_create() { + // Check the current status + $status = self::get_latest(); + if ($status && $status->Status == 'Running') { + $status->updateStatus(); + return $status; + } + + return self::create_status(); + } + + /* + * Create and prepare a new status + * + * @return self + */ + public static function create_status() { + // If the script is to be started create a new status + $status = self::create(); + $status->updateJobInfo('Creating new tracking object'); + + // Setup all pages to test + $pageIDs = Versioned::get_by_stage('SiteTree', 'Stage') + ->column('ID'); + foreach ($pageIDs as $pageID) { + $trackPage = BrokenExternalPageTrack::create(); + $trackPage->PageID = $pageID; + $trackPage->StatusID = $status->ID; + $trackPage->write(); + } + + return $status; + } + + public function updateJobInfo($message) { + $this->JobInfo = $message; + $this->write(); + } + + /** + * Self check status + */ + public function updateStatus() { + if ($this->CompletedPages == $this->TotalPages) { + $this->Status = 'Completed'; + $this->updateJobInfo('Setting to completed'); + } + } +} \ No newline at end of file diff --git a/code/reports/BrokenExternalLinksReport.php b/code/reports/BrokenExternalLinksReport.php index c908820..5809cc6 100644 --- a/code/reports/BrokenExternalLinksReport.php +++ b/code/reports/BrokenExternalLinksReport.php @@ -8,43 +8,41 @@ class BrokenExternalLinksReport extends SS_Report { - /** - * Columns in the report - * - * @var array - * @config - */ - private static $columns = array( - 'Created' => 'Checked', - 'Link' => 'External Link', - 'HTTPCode' => 'HTTP Error Code', - 'PageLink' => array( - 'title' => 'Page link is on', - 'link' => true - ), - ); - - public function init() { - parent::init(); - } - /** * Returns the report title - * + * * @return string */ public function title() { - return _t('ExternalBrokenLinksReport.EXTERNALBROKENLINKS', - "External broken links report"); + return _t('ExternalBrokenLinksReport.EXTERNALBROKENLINKS', "External broken links report"); } - /** - * Returns the column names of the report - * - * @return array - */ public function columns() { - return self::$columns; + return array( + "Created" => "Checked", + 'Link' => array( + 'title' => 'External Link', + 'formatting' => function($value, $item) { + return sprintf( + '%s', + Convert::raw2att($item->Link), + Convert::raw2xml($item->Link) + ); + } + ), + 'HTTPCode' => 'HTTP Error Code', + "Title" => array( + "title" => 'Page link is on', + 'formatting' => function($value, $item) { + $page = $item->Page(); + return sprintf( + '%s', + Convert::raw2att($page->CMSEditLink()), + Convert::raw2xml($page->Title) + ); + } + ) + ); } /** @@ -57,20 +55,9 @@ class BrokenExternalLinksReport extends SS_Report { } public function sourceRecords() { - $track = CheckExternalLinks::getLatestTrack(); - $returnSet = new ArrayList(); - if ($track && $track->exists()) { - $links = BrokenExternalLink::get() - ->filter('TrackID', $track->ID); - } else { - $links = BrokenExternalLink::get(); - } - foreach ($links as $link) { - $link->PageLink = $link->Page()->Title; - $link->ID = $link->Page()->ID; - $returnSet->push($link); - } - return $returnSet; + $track = BrokenExternalPageTrackStatus::get_latest(); + if ($track) return $track->BrokenLinks(); + return new ArrayList(); } public function getCMSFields() { diff --git a/code/tasks/CheckExternalLinks.php b/code/tasks/CheckExternalLinks.php deleted file mode 100644 index 4fa8dce..0000000 --- a/code/tasks/CheckExternalLinks.php +++ /dev/null @@ -1,218 +0,0 @@ -runLinksCheck($this->limit); - } - - /** - * Runs the links checker and returns the track used - * - * @param int $limit Limit to number of pages to run - * @return BrokenExternalPageTrackStatus - */ - public function runLinksCheck($limit) { - $track = CheckExternalLinks::getLatestTrack(); - - // if the script has already been started - if ($track && $track->Status == 'Running') { - $batch = BrokenExternalPageTrack::get() - ->filter(array( - 'TrackID' => $track->ID, - 'Processed' => 0 - ))->limit($limit)->column('PageID'); - $pages = Versioned::get_by_stage('SiteTree', 'Stage') - ->filter('ID', $batch) - ->limit($limit); - $this->updateJobInfo('Fetching pages to check'); - if ($track->CompletedPages == $track->TotalPages) { - $track->Status = 'Completed'; - $track->write(); - $this->updateJobInfo('Setting to completed'); - } - // if the script is to be started - } else { - $pages = Versioned::get_by_stage('SiteTree', 'Stage')->column('ID'); - $noPages = count($pages); - - $track = BrokenExternalPageTrackStatus::create(); - $track->TotalPages = $noPages; - $track->write(); - $this->updateJobInfo('Creating new tracking object'); - - foreach ($pages as $page) { - $trackPage = BrokenExternalPageTrack::create(); - $trackPage->PageID = $page; - $trackPage->TrackID = $track->ID; - $trackPage->write(); - } - - $batch = BrokenExternalPageTrack::get() - ->filter(array( - 'TrackID' => $track->ID - ))->limit($limit)->column('PageID'); - - $pages = Versioned::get_by_stage('SiteTree', 'Stage') - ->filter('ID', $batch); - } - $trackID = $track->ID; - foreach ($pages as $page) { - ++$this->totalPages; - - if ($track->ID) { - $trackPage = BrokenExternalPageTrack::get() - ->filter(array( - 'PageID' => $page->ID, - 'TrackID' => $track->ID - ))->first(); - $trackPage->Processed = 1; - $trackPage->write(); - } - - $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); - if (!$htmlValue->isValid()) { - continue; - } - - // Populate link tracking for internal links & links to asset files. - if($links = $htmlValue->getElementsByTagName('a')) foreach($links as $link) { - $class = $link->getAttribute('class'); - $pos = stripos($class, 'ss-broken'); - if ($pos !== false && $page->HasBrokenLink == 1) continue; - - $href = Director::makeRelative($link->getAttribute('href')); - if ($href == 'admin/') continue; - - // ignore SiteTree, anchor and assets links as they will be caught - // by SiteTreeLinkTracking - if(preg_match('/\[(file_link|sitetree_link),id=([0-9]+)\]/i', $href, $matches)) { - continue; - } else if (isset($href[0]) && $href[0] == '#') { - continue; - } else if(substr($href, 0, strlen(ASSETS_DIR) + 1) == ASSETS_DIR.'/') { - continue; - } - - if($href && function_exists('curl_init')) { - $handle = curl_init($href); - curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE); - curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5); - curl_setopt($handle, CURLOPT_TIMEOUT, 10); - $response = curl_exec($handle); - $httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE); - curl_close($handle); - // do we have any whitelisted codes - $ignoreCodes = Config::inst()->get('CheckExternalLinks', 'IgnoreCodes'); - // if the code is whitelisted set it to 200 - $httpCode = (is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) ? - 200 : $httpCode; - - // ignore empty hrefs and internal links - if (($httpCode < 200 || $httpCode > 302) || ($href == '' || $href[0] == '/')) { - $brokenLink = new BrokenExternalLink(); - $brokenLink->PageID = $page->ID; - $brokenLink->Link = $href; - $brokenLink->HTTPCode = $httpCode; - $brokenLink->TrackID = $track->ID; - $brokenLink->write(); - - // set the broken link class - $class = ($class && stripos($class, 'ss-broken')) ? - $class . ' ss-broken' : 'ss-broken'; - $link->setAttribute('class', ($class ? $class : 'ss-broken')); - $htmlValue->__call('saveHTML', array()); - - $page->Content = $htmlValue->getContent(); - $page->owner->write(); - - if (!$page->HasBrokenLink) { - - // bypass the ORM as syncLinkTracking does not allow you - // to update HasBrokenLink to true - $query = "UPDATE \"SiteTree\" SET \"HasBrokenLink\" = 1 "; - $query .= "WHERE \"ID\" = " . (int)$page->ID; - $result = DB::query($query); - if (!$result) { - $this->debugMessage('Error updating HasBrokenLink'); - } - } - - } - } - } - ++$this->completedPages; - } - - // run this outside the foreach loop to stop it locking DB rows - $this->updateJobInfo('Updating completed pages'); - $this->updateCompletedPages($trackID); - - // do we need to carry on running the job - $track = $this->getLatestTrack(); - if ($track->CompletedPages >= $track->TotalPages) { - $track->Status = 'Completed'; - $track->write(); - - // clear any old previous data - $rows = BrokenExternalPageTrack::get() - ->exclude('TrackID', $track->ID); - foreach ($rows as $row) { - $row->delete(); - } - return $track; - } - - // if running via the queued job module return to the queued job after each iteration - if ($limit == 1) { - return $track; - } else { - $this->updateJobInfo("Running next batch {$track->CompletedPages}/{$track->TotalPages}"); - return $this->runLinksCheck($limit); - } - } - - public static function getLatestTrack() { - $track = BrokenExternalPageTrackStatus::get()->sort('ID', 'DESC')->first(); - if (!$track || !$track->exists()) return null; - return $track; - } - - public static function getLatestTrackID() { - $track = CheckExternalLinks::getLatestTrack(); - if (!$track || !$track->exists()) return null; - return $track->ID; - } - - public static function getLatestTrackStatus() { - $track = CheckExternalLinks::getLatestTrack(); - if (!$track || !$track->exists()) return null; - return $track->Status; - } - - private function updateCompletedPages($trackID = 0) { - $noPages = BrokenExternalPageTrack::get() - ->filter(array('TrackID' => $trackID, 'Processed' => 1))->count(); - $track = $this->getLatestTrack($trackID); - $track->CompletedPages = $noPages; - $track->write(); - return $noPages; - } - - private function updateJobInfo($message) { - $track = CheckExternalLinks::getLatestTrack(); - if (!$track || !$track->exists()) return null; - $track->JobInfo = $message; - $track->write(); - } -} diff --git a/code/tasks/CheckExternalLinksTask.php b/code/tasks/CheckExternalLinksTask.php new file mode 100644 index 0000000..97e61c6 --- /dev/null +++ b/code/tasks/CheckExternalLinksTask.php @@ -0,0 +1,204 @@ +silent) Debug::message($message); + } + + /** + * @var bool + */ + protected $silent = false; + + /** + * Turn on or off message output + * + * @param bool $silent + */ + public function setSilent($silent) { + $this->silent = $silent; + } + + protected $title = 'Checking broken External links in the SiteTree'; + + protected $description = 'A task that records external broken links in the SiteTree'; + + protected $enabled = true; + + private static $dependencies = array( + 'LinkChecker' => '%$LinkChecker' + ); + + public function run($request) { + $this->runLinksCheck(); + } + + /** + * @var LinkChecker + */ + protected $linkChecker; + + /** + * @param LinkChecker $linkChecker + */ + public function setLinkChecker(LinkChecker $linkChecker) { + $this->linkChecker = $linkChecker; + } + + /** + * @return LinkChecker + */ + public function getLinkChecker() { + return $this->linkChecker; + } + + + /** + * Check the status of a single link on a page + * + * @param BrokenExternalPageTrack $pageTrack + * @param DOMNode $link + */ + protected function checkPageLink(BrokenExternalPageTrack $pageTrack, DOMNode $link) { + $class = $link->getAttribute('class'); + $href = $link->getAttribute('href'); + $markedBroken = preg_match('/\b(ss-broken)\b/', $class); + + // Check link + $httpCode = $this->linkChecker->checkLink($href); + if($httpCode === null) return; // Null link means uncheckable, such as an internal link + + // If this code is broken then mark as such + if($foundBroken = $this->isCodeBroken($httpCode)) { + // Create broken record + $brokenLink = new BrokenExternalLink(); + $brokenLink->Link = $href; + $brokenLink->HTTPCode = $httpCode; + $brokenLink->TrackID = $pageTrack->ID; + $brokenLink->StatusID = $pageTrack->StatusID; // Slight denormalisation here for performance reasons + $brokenLink->write(); + } + + // Check if we need to update CSS class, otherwise return + if($markedBroken == $foundBroken) return; + if($foundBroken) { + $class .= ' ss-broken'; + } else { + $class = preg_replace('/\s*\b(ss-broken)\b\s*/', ' ', $class); + } + $link->setAttribute('class', trim($class)); + } + + /** + * Determine if the given HTTP code is "broken" + * + * @param int $httpCode + * @return bool True if this is a broken code + */ + protected function isCodeBroken($httpCode) { + // Null represents no request attempted + if($httpCode === null) return false; + + // do we have any whitelisted codes + $ignoreCodes = Config::inst()->get('CheckExternalLinks', 'IgnoreCodes'); + if(is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) return false; + + // Check if code is outside valid range + return $httpCode < 200 || $httpCode > 302; + } + + /** + * Runs the links checker and returns the track used + * + * @param int $limit Limit to number of pages to run, or null to run all + * @return BrokenExternalPageTrackStatus + */ + public function runLinksCheck($limit = null) { + // Check the current status + $status = BrokenExternalPageTrackStatus::get_or_create(); + + // Calculate pages to run + $pageTracks = $status->getIncompleteTracks(); + if($limit) $pageTracks = $pageTracks->limit($limit); + + // Check each page + foreach ($pageTracks as $pageTrack) { + // Flag as complete + $pageTrack->Processed = 1; + $pageTrack->write(); + + // Check value of html area + $page = $pageTrack->Page(); + $this->log("Checking {$page->Title}"); + $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); + if (!$htmlValue->isValid()) continue; + + // Check each link + $links = $htmlValue->getElementsByTagName('a'); + foreach($links as $link) { + $this->checkPageLink($pageTrack, $link); + } + + // Update content of page based on link fixes / breakages + $htmlValue->saveHTML(); + $page->Content = $htmlValue->getContent(); + $page->write(); + + // Once all links have been created for this page update HasBrokenLinks + $count = $pageTrack->BrokenLinks()->count(); + $this->log("Found {$count} broken links"); + if($count) { + // Bypass the ORM as syncLinkTracking does not allow you to update HasBrokenLink to true + DB::query(sprintf( + 'UPDATE "SiteTree" SET "HasBrokenLink" = 1 WHERE "ID" = \'%d\'', + intval($pageTrack->ID) + )); + } + } + + $status->updateJobInfo('Updating completed pages'); + $status->updateStatus(); + return $status; + } + + public static function getLatestTrack() { + return BrokenExternalPageTrackStatus::get_latest(); + } + + public static function getLatestTrackID() { + $track = BrokenExternalPageTrackStatus::get_latest(); + return $track ? $track->ID : null; + } + + public static function getLatestTrackStatus() { + $track = BrokenExternalPageTrackStatus::get_latest(); + return $track ? $track->Status : null; + } + + private function updateCompletedPages($trackID = 0) { + $noPages = BrokenExternalPageTrack::get() + ->filter(array( + 'TrackID' => $trackID, + 'Processed' => 1 + )) + ->count(); + $track = $this->getLatestTrack($trackID); + $track->CompletedPages = $noPages; + $track->write(); + return $noPages; + } + + private function updateJobInfo($message) { + $track = BrokenExternalPageTrackStatus::get_latest(); + if($track) { + $track->JobInfo = $message; + $track->write(); + } + } +} diff --git a/code/tasks/CurlLinkChecker.php b/code/tasks/CurlLinkChecker.php new file mode 100644 index 0000000..4e307ff --- /dev/null +++ b/code/tasks/CurlLinkChecker.php @@ -0,0 +1,49 @@ + true) + ); + } + + /** + * Determine the http status code for a given link + * + * @param string $href URL to check + * @return int HTTP status code, or null if not checkable (not a link) + */ + protected function checkLink($href) { + // Skip non-external links + if(!preg_match('/^https?[^:]*:\/\//', $href)) return null; + + // Check if we have a cached result + $cacheKey = md5($href); + $result = $this->getCache()->load($cacheKey); + if($result !== false) return $result; + + // No cached result so just request + $handle = curl_init($href); + curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE); + curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5); + curl_setopt($handle, CURLOPT_TIMEOUT, 10); + curl_exec($handle); + $httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE); + curl_close($handle); + + // Cache result + $this->getCache()->save($httpCode, $cacheKey); + return $httpCode; + } +} diff --git a/code/tasks/LinkChecker.php b/code/tasks/LinkChecker.php new file mode 100644 index 0000000..629f177 --- /dev/null +++ b/code/tasks/LinkChecker.php @@ -0,0 +1,15 @@ +=3.0", "silverstripe/cms": ">=3.0" }, + "require-dev": { + "hafriedlander/silverstripe-phockito": "*", + "phpunit/PHPUnit": "~3.7@stable" + }, "suggest": { "silverstripe/queuedjobs": "Speeds up running the job for Content Editors fropm the report" } diff --git a/javascript/BrokenExternalLinksReport.js b/javascript/BrokenExternalLinksReport.js index ff9acfb..5b3c853 100644 --- a/javascript/BrokenExternalLinksReport.js +++ b/javascript/BrokenExternalLinksReport.js @@ -1,57 +1,63 @@ (function($) { - $('#externalLinksReport').entwine({ - onclick: function() { - $(this).start(); - }, - onmatch: function() { - // poll the current job and update the front end status - $('#externalLinksReport').hide(); - $(this).poll(0); - }, - start: function() { - // initiate a new job - $('#ReportHolder').empty(); - $('#ReportHolder').text('Running report 0%'); - $('#ReportHolder').append(''); - $('#externalLinksReport').hide(); - $.ajax({url: "admin/externallinks/start", async: true, timeout: 3000 }); - $(this).poll(1); - }, - poll: function(start) { - $.ajax({ - url: "admin/externallinks/getJobStatus", - async: true, - success: function(data) { - var obj = $.parseJSON(data); - - // No report, so let user create one - if (!obj) { - $('#externalLinksReport').show(); - return; - } - var completed = obj.Completed ? obj.Completed : 0; - var total = obj.Total ? obj.Total : 0; - var jobStatus = obj.Status ? obj.Status : 'Running'; - if (jobStatus == 'Completed' && start == 0) { - $('#ReportHolder').text('Report Finished ' + completed + '/' + total); - $('#externalLinksReport').show(); - } else { - setTimeout(function() { $('#externalLinksReport').poll(0); }, 1000); - } - if (total && completed) { + $.entwine('ss', function($) { + $('#externalLinksReport').entwine({ + onclick: function() { + $(this).start(); + }, + onmatch: function() { + // poll the current job and update the front end status + $('#externalLinksReport').hide(); + $(this).poll(0); + }, + start: function() { + // initiate a new job + $('#ReportHolder').empty(); + $('#ReportHolder').text('Running report 0%'); + $('#ReportHolder').append(''); + $('#externalLinksReport').hide(); + $.ajax({url: "admin/externallinks/start", async: true, timeout: 3000 }); + $(this).poll(1); + }, + poll: function(start) { + $.ajax({ + url: "admin/externallinks/getJobStatus", + async: true, + success: function(data) { + // No report, so let user create one + if (!data) { + $('#externalLinksReport').show(); + return; + } + + // Parse data + var completed = data.Completed ? data.Completed : 0; + var total = data.Total ? data.Total : 0; + + // If complete status + if (data.Status === 'Completed') { + $('#ReportHolder').text('Report Finished ' + completed + '/' + total); + $('#externalLinksReport').show(); + return; + } + + // If incomplete update status if (completed < total) { var percent = (completed / total) * 100; - $('#ReportHolder').text('Running report ' + completed + '/' + - total + ' (' + percent.toFixed(2) + '%)'); - $('#ReportHolder'). - append(''); - } + $('#ReportHolder') + .text('Running report ' + completed + '/' + total + ' (' + percent.toFixed(2) + '%)') + .append(''); + } + + // Ensure the regular poll method is run + if(!start) { + setTimeout(function() { $('#externalLinksReport').poll(0); }, 1000); + } + }, + error: function(e) { + if(typeof console !== 'undefined') console.log(e); } - }, - error: function(e) { - if(typeof console !== 'undefined') console.log(e); - } - }); - } + }); + } + }); }); }(jQuery)); diff --git a/tests/ExternalLinksTest.php b/tests/ExternalLinksTest.php index 6c7bf61..2a4f640 100644 --- a/tests/ExternalLinksTest.php +++ b/tests/ExternalLinksTest.php @@ -1,32 +1,129 @@ skipTest = true; + return $this->markTestSkipped("These tests need the Phockito module installed to run"); + } + + // Mock link checker + $checker = Phockito::mock('LinkChecker'); + Phockito::when($checker) + ->checkLink('http://www.working.com') + ->return(200); + + Phockito::when($checker) + ->checkLink('http://www.broken.com/url/thing') // 404 on working site + ->return(404); + + Phockito::when($checker) + ->checkLink('http://www.broken.com') // 403 on working site + ->return(403); + + Phockito::when($checker) + ->checkLink('http://www.nodomain.com') // no ping + ->return(0); + + Phockito::when($checker) + ->checkLink('/internal/link') + ->return(null); + + Phockito::when($checker) + ->checkLink('[sitetree_link,id=9999]') + ->return(null); + + Phockito::when($checker) + ->checkLink('home') + ->return(null); + + Phockito::when($checker) + ->checkLink('broken-internal') + ->return(null); + + Phockito::when($checker) + ->checkLink('[sitetree_link,id=1]') + ->return(null); + + Phockito::when($checker) + ->checkLink(anything()) // anything else is 404 + ->return(404); + + Injector::inst()->registerService($checker, 'LinkChecker'); + } + + public function tearDown() { + Injector::unnest(); + parent::tearDown(); + } + public function testLinks() { - // uses http://127.0.0.1 to test a working link - $working = $this->objFromFixture('SiteTree', 'working'); - $working->write(); - $task = new CheckExternalLinks(); - $task->run(null); - $brokenLinks = BrokenExternalLink::get()->column('Link');; - // confirm the working link has not been added as a broken link - $this->assertNotEquals($working->Link, $brokenLinks[0]); - } - - public function testBrokenLink() { - // uses http://192.0.2.1 for a broken link - $broken = $this->objFromFixture('SiteTree', 'broken'); - $broken->write(); - $task = new CheckExternalLinks(); - $task->run(null); - $brokenLinks = BrokenExternalLink::get(); - $this->assertEquals(1, $brokenLinks->count()); + // Run link checker + $task = CheckExternalLinksTask::create(); + $task->setSilent(true); // Be quiet during the test! + $task->runLinksCheck(); + + // Get all links checked + $status = BrokenExternalPageTrackStatus::get_latest(); + $this->assertEquals('Completed', $status->Status); + $this->assertEquals(5, $status->TotalPages); + $this->assertEquals(5, $status->CompletedPages); + + // Check all pages have had the correct HTML adjusted + for($i = 1; $i <= 5; $i++) { + $page = $this->objFromFixture('ExternalLinksTest_Page', 'page'.$i); + $this->assertNotEmpty($page->Content); + $this->assertEquals( + $page->ExpectedContent, + $page->Content, + "Assert that the content of page{$i} has been updated" + ); + } + + // Check that the correct report of broken links is generated + $links = $status + ->BrokenLinks() + ->sort('Link'); + + $this->assertEquals(4, $links->count()); + $this->assertEquals( + array( + 'http://www.broken.com', + 'http://www.broken.com/url/thing', + 'http://www.broken.com/url/thing', + 'http://www.nodomain.com' + ), + array_values($links->map('ID', 'Link')->toArray()) + ); + + // Check response codes are correct + $expected = array( + 'http://www.broken.com' => 403, + 'http://www.broken.com/url/thing' => 404, + 'http://www.nodomain.com' => 0 + ); + $actual = $links->map('Link', 'HTTPCode')->toArray(); + $this->assertEquals($expected, $actual); } + /** + * Test that broken links appears in the reports list + */ public function testReportExists() { - $mock = $this->objFromFixture('SiteTree', 'broken'); $reports = SS_Report::get_reports(); $reportNames = array(); foreach($reports as $report) { @@ -37,3 +134,8 @@ class ExternalLinks extends FunctionalTest { } } +class ExternalLinksTest_Page extends Page implements TestOnly { + private static $db = array( + 'ExpectedContent' => 'HTMLText' + ); +} diff --git a/tests/ExternalLinksTest.yml b/tests/ExternalLinksTest.yml index 46eb0a8..9e92c62 100644 --- a/tests/ExternalLinksTest.yml +++ b/tests/ExternalLinksTest.yml @@ -1,7 +1,56 @@ -SiteTree: - working: - Title: Working Link - Content: 'Localhost' - broken: - Title: Broken Link - Content: 'Broken' \ No newline at end of file +ExternalLinksTest_Page: + # Tests mix of broken and working external links + page1: + Title: 'Page 1' + Content: > +

Links

+ This is a working site +

Other Links

+ but this isn't + ExpectedContent: > +

Links

+ This is a working site +

Other Links

+ but this isn't + # Tests broken external link staying broken + page2: + Title: 'Page 2' + Content: > +

Still Broken

+ ExpectedContent: > +

Still Broken

+ # Tests internal broken links not marking a page as broken + page3: + Title: 'Page 3' + Content: > +

Links

+ Home page + Broken internal page + This is a working site + ExpectedContent: > +

Links

+ Home page + Broken internal page + This is a working site + # Tests httpcode = 0 + page4: + Title: 'Page 4' + Content: > + This shouldn't even have a HTTP response + Another Link +

Copied from another page

+ ExpectedContent: > + This shouldn't even have a HTTP response + Another Link +

Copied from another page

+ # Test page with no broken links + page5: + Title: 'Page 5' + Content: > + Internal Link + Another Link + This is a working site + ExpectedContent: > + Internal Link + Another Link + This is a working site