diff --git a/.gitattributes b/.gitattributes index 475f5f2..89eb187 100644 --- a/.gitattributes +++ b/.gitattributes @@ -4,3 +4,4 @@ /.gitignore export-ignore /.travis.yml export-ignore /.scrutinizer.yml export-ignore +/codecov.yml export-ignore diff --git a/.scrutinizer.yml b/.scrutinizer.yml index d1ebd80..051ef9a 100644 --- a/.scrutinizer.yml +++ b/.scrutinizer.yml @@ -1,69 +1,15 @@ inherit: true +build: + nodes: + analysis: + tests: + override: [php-scrutinizer-run] + checks: php: - verify_property_names: true - verify_argument_usable_as_reference: true - verify_access_scope_valid: true - useless_calls: true - use_statement_alias_conflict: true - variable_existence: true - unused_variables: true - unused_properties: true - unused_parameters: true - unused_methods: true - unreachable_code: true - too_many_arguments: true - sql_injection_vulnerabilities: true - simplify_boolean_return: true - side_effects_or_types: true - security_vulnerabilities: true - return_doc_comments: true - return_doc_comment_if_not_inferrable: true - require_scope_for_properties: true - require_scope_for_methods: true - require_php_tag_first: true - psr2_switch_declaration: true - psr2_class_declaration: true - property_assignments: true - prefer_while_loop_over_for_loop: true - precedence_mistakes: true - precedence_in_conditions: true - phpunit_assertions: true - php5_style_constructor: true - parse_doc_comments: true - parameter_non_unique: true - parameter_doc_comments: true - param_doc_comment_if_not_inferrable: true - optional_parameters_at_the_end: true - one_class_per_file: true - no_unnecessary_if: true - no_trailing_whitespace: true - no_property_on_interface: true - no_non_implemented_abstract_methods: true - no_error_suppression: true - no_duplicate_arguments: true - no_commented_out_code: true - newline_at_end_of_file: true - missing_arguments: true - method_calls_on_non_object: true - instanceof_class_exists: true - foreach_traversable: true - fix_line_ending: true - fix_doc_comments: true - duplication: true - deprecated_code_usage: true - deadlock_detection_in_loops: true code_rating: true - closure_use_not_conflicting: true - catch_class_exists: true - blank_line_after_namespace_declaration: false - avoid_multiple_statements_on_same_line: true - avoid_duplicate_types: true - avoid_conflicting_incrementers: true - avoid_closing_tag: true - assignment_of_null_return: true - argument_type_checks: true + duplication: true filter: - paths: [code/*, tests/*] + paths: [src/*, tests/*] diff --git a/.travis.yml b/.travis.yml index a237a89..adfb908 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,34 +1,35 @@ -# See https://github.com/silverstripe/silverstripe-travis-support for setup details - -sudo: false - language: php -php: - - 5.3 - - 5.4 - - 5.5 - env: - - DB=MYSQL CORE_RELEASE=3.5 + global: + - COMPOSER_ROOT_VERSION=2.0.x-dev matrix: include: - php: 5.6 - env: DB=MYSQL CORE_RELEASE=3 - - php: 5.6 - env: DB=MYSQL CORE_RELEASE=3.1 - - php: 5.6 - env: DB=PGSQL CORE_RELEASE=3.2 + env: DB=MYSQL PHPCS_TEST=1 PHPUNIT_TEST=1 + - php: 7.0 + env: DB=MYSQL PHPUNIT_TEST=1 - php: 7.1 - env: DB=MYSQL CORE_RELEASE=3.6 + env: DB=PGSQL PHPUNIT_COVERAGE_TEST=1 + - php: 7.2 + env: DB=MYSQL PHPUNIT_TEST=1 before_script: - - composer self-update || true - - git clone git://github.com/silverstripe/silverstripe-travis-support.git ~/travis-support - - php ~/travis-support/travis_setup.php --source `pwd` --target ~/builds/ss - - cd ~/builds/ss - - composer install + # Init PHP + - phpenv rehash + - phpenv config-rm xdebug.ini + + # Install composer dependencies + - composer validate + - composer require --no-update silverstripe/installer 4.0.x-dev + - if [[ $DB == PGSQL ]]; then composer require --no-update silverstripe/postgresql 2.0.x-dev; fi + - composer install --prefer-dist --no-interaction --no-progress --no-suggest --optimize-autoloader --verbose --profile script: - - vendor/bin/phpunit externallinks/tests + - if [[ $PHPUNIT_TEST ]]; then vendor/bin/phpunit; fi + - if [[ $PHPUNIT_COVERAGE_TEST ]]; then phpdbg -qrr vendor/bin/phpunit --coverage-clover=coverage.xml; fi + - if [[ $PHPCS_TEST ]]; then vendor/bin/phpcs src/ tests/ *.php; fi + +after_success: + - if [[ $PHPUNIT_COVERAGE_TEST ]]; then bash <(curl -s https://codecov.io/bash) -f coverage.xml; fi diff --git a/.upgrade.yml b/.upgrade.yml new file mode 100644 index 0000000..5bad3f1 --- /dev/null +++ b/.upgrade.yml @@ -0,0 +1,10 @@ +mappings: + CMSExternalLinks_Controller: SilverStripe\ExternalLinks\Controllers\CMSExternalLinksController + CheckExternalLinksJob: SilverStripe\ExternalLinks\Jobs\CheckExternalLinksJob + BrokenExternalLink: SilverStripe\ExternalLinks\Model\BrokenExternalLink + BrokenExternalPageTrack: SilverStripe\ExternalLinks\Model\BrokenExternalPageTrack + BrokenExternalPageTrackStatus: SilverStripe\ExternalLinks\Model\BrokenExternalPageTrackStatus + BrokenExternalLinksReport: SilverStripe\ExternalLinks\Reports\BrokenExternalLinksReport + CheckExternalLinksTask: SilverStripe\ExternalLinks\Tasks\CheckExternalLinksTask + CurlLinkChecker: SilverStripe\ExternalLinks\Tasks\CurlLinkChecker + LinkChecker: SilverStripe\ExternalLinks\Tasks\LinkChecker diff --git a/README.md b/README.md index f657928..5e2f450 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ # External links -[![Build Status](https://travis-ci.org/silverstripe/silverstripe-externallinks.svg?branch=master)](https://travis-ci.org/silverstripe/silverstripe-externallinks) +[![Build Status](http://img.shields.io/travis/silverstripe/silverstripe-externallinks.svg?style=flat)](https://travis-ci.org/silverstripe/silverstripe-externallinks) +[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/silverstripe/silverstripe-externallinks/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/silverstripe/silverstripe-externallinks/?branch=master) +[![codecov](https://codecov.io/gh/silverstripe/silverstripe-externallinks/branch/master/graph/badge.svg)](https://codecov.io/gh/silverstripe/silverstripe-externallinks) ## Introduction @@ -12,21 +14,19 @@ The external links module is a task and ModelAdmin to track and to report on bro ## Requirements - * SilverStripe 3.1 + +* SilverStripe ^4.0 + +**Note:** For a SilverStripe 3.x compatible version, please use [the 1.x release line](https://github.com/silverstripe/silverstripe-externallinks/tree/1.0). ## Features * Add external links to broken links reports * Add a task to track external broken links -See the [changelog](CHANGELOG.md) for version history. - ## Installation - 1. If you have composer you can use `composer require silverstripe/externallinks:*`. Otherwise, - download the module from GitHub and extract to the 'externallinks' folder. Place this directory - in your sites root directory. This is the one with framework and cms in it. - 2. Run in your browser - `/dev/build` to rebuild the database. + 1. Require the module via composer: `composer require silverstripe/externallinks` + 2. Run `/dev/build` in your browser to rebuild the database. 3. Run the following task *http://path.to.silverstripe/dev/tasks/CheckExternalLinks* to check for broken external links @@ -63,20 +63,17 @@ broken links. ## Queued job ## -If you have the queuedjobs module installed you can set the task to be run every so ofter -Add the following yml config to config.yml in mysite/_config have the the task run once every day (86400 seconds) - - CheckExternalLinks: - Delay: 86400 +If you have the queuedjobs module installed you can set the task to be run every so often. ## Whitelisting codes ## If you want to ignore or whitelist certain http codes this can be setup via IgnoreCodes in the config.yml -file in mysite/_config +file in `mysite/_config` - CheckExternalLinks: - Delay: 60 +```yml + SilverStripe\ExternalLinks\Tasks\CheckExternalLinksTask: IgnoreCodes: - 401 - 403 - 501 +``` diff --git a/_config/injector.yml b/_config/injector.yml index ab86975..4d890b6 100644 --- a/_config/injector.yml +++ b/_config/injector.yml @@ -1,5 +1,9 @@ --- Name: externallinksdependencies --- -Injector: - LinkChecker: CurlLinkChecker +SilverStripe\Core\Injector\Injector: + SilverStripe\ExternalLinks\Tasks\LinkChecker: SilverStripe\ExternalLinks\Tasks\CurlLinkChecker + Psr\SimpleCache\CacheInterface.CurlLinkChecker: + factory: SilverStripe\Core\Cache\CacheFactory + constructor: + namespace: 'curllinkchecker' diff --git a/_config/routes.yml b/_config/routes.yml index 2284d3a..2ac22a9 100644 --- a/_config/routes.yml +++ b/_config/routes.yml @@ -1,7 +1,7 @@ --- -Name: externallink -After: framework/routes +Name: externallinkroutes +Before: '#adminroutes' --- -Director: +SilverStripe\Control\Director: rules: - 'admin/externallinks//$Action': 'CMSExternalLinks_Controller' + 'admin/externallinks//$Action': SilverStripe\ExternalLinks\Controllers\CMSExternalLinksController diff --git a/code/controllers/CMSExternalLinks.php b/code/controllers/CMSExternalLinks.php deleted file mode 100644 index 2b0efb6..0000000 --- a/code/controllers/CMSExternalLinks.php +++ /dev/null @@ -1,54 +0,0 @@ -response); - $this->response - ->addHeader('Content-Type', 'application/json') - ->addHeader('Content-Encoding', 'UTF-8') - ->addHeader('X-Content-Type-Options', 'nosniff'); - - // Format status - $track = BrokenExternalPageTrackStatus::get_latest(); - if($track) return json_encode(array( - 'TrackID' => $track->ID, - 'Status' => $track->Status, - 'Completed' => $track->getCompletedPages(), - 'Total' => $track->getTotalPages() - )); - } - - - /* - * Starts a broken external link check - */ - public function start() { - // return if the a job is already running - $status = BrokenExternalPageTrackStatus::get_latest(); - if ($status && $status->Status == 'Running') return; - - // Create a new job - if (class_exists('QueuedJobService')) { - // Force the creation of a new run - BrokenExternalPageTrackStatus::create_status(); - $checkLinks = new CheckExternalLinksJob(); - singleton('QueuedJobService')->queueJob($checkLinks); - } else { - //TODO this hangs as it waits for the connection to be released - // should return back and continue processing - // http://us3.php.net/manual/en/features.connection-handling.php - $task = CheckExternalLinksTask::create(); - $task->runLinksCheck(); - } - } -} diff --git a/code/jobs/CheckExternalLinksJob.php b/code/jobs/CheckExternalLinksJob.php deleted file mode 100644 index 3f4311a..0000000 --- a/code/jobs/CheckExternalLinksJob.php +++ /dev/null @@ -1,34 +0,0 @@ -runLinksCheck(1); - $this->currentStep = $track->CompletedPages; - $this->totalSteps = $track->TotalPages; - $this->isComplete = $track->Status === 'Completed'; - } - -} diff --git a/code/model/BrokenExternalLink.php b/code/model/BrokenExternalLink.php deleted file mode 100644 index 18c484f..0000000 --- a/code/model/BrokenExternalLink.php +++ /dev/null @@ -1,71 +0,0 @@ - 'Varchar(2083)', // 2083 is the maximum length of a URL in Internet Explorer. - 'HTTPCode' =>'Int' - ); - - private static $has_one = array( - 'Track' => 'BrokenExternalPageTrack', - 'Status' => 'BrokenExternalPageTrackStatus' - ); - - private static $summary_fields = array( - 'Created' => 'Checked', - 'Link' => 'External Link', - 'HTTPCodeDescription' => 'HTTP Error Code', - 'Page.Title' => 'Page link is on' - ); - - private static $searchable_fields = array( - 'HTTPCode' => array('title' => 'HTTP Code') - ); - - /** - * @return SiteTree - */ - public function Page() { - return $this->Track()->Page(); - } - - public function canEdit($member = false) { - return false; - } - - public function canView($member = false) { - $member = $member ? $member : Member::currentUser(); - $codes = array('content-authors', 'administrators'); - return Permission::checkMember($member, $codes); - } - - /** - * Retrieve a human readable description of a response code - * - * @return string - */ - public function getHTTPCodeDescription() { - $code = $this->HTTPCode; - if(empty($code)) { - // Assume that $code = 0 means there was no response - $description = _t('BrokenExternalLink.NOTAVAILABLE', 'Server Not Available'); - } elseif( - ($descriptions = Config::inst()->get('SS_HTTPResponse', 'status_codes')) - && isset($descriptions[$code]) - ) { - $description = $descriptions[$code]; - } else { - $description = _t('BrokenExternalLink.UNKNOWNRESPONSE', 'Unknown Response Code'); - } - return sprintf("%d (%s)", $code, $description); - } -} - - diff --git a/code/model/BrokenExternalPageTrack.php b/code/model/BrokenExternalPageTrack.php deleted file mode 100644 index 828b541..0000000 --- a/code/model/BrokenExternalPageTrack.php +++ /dev/null @@ -1,28 +0,0 @@ - 'Boolean' - ); - - private static $has_one = array( - 'Page' => 'SiteTree', - 'Status' => 'BrokenExternalPageTrackStatus' - ); - - private static $has_many = array( - 'BrokenLinks' => 'BrokenExternalLink' - ); - - /** - * @return SiteTree - */ - public function Page() { - return Versioned::get_by_stage('SiteTree', 'Stage') - ->byID($this->PageID); - } -} diff --git a/code/model/BrokenExternalPageTrackStatus.php b/code/model/BrokenExternalPageTrackStatus.php deleted file mode 100644 index 141b996..0000000 --- a/code/model/BrokenExternalPageTrackStatus.php +++ /dev/null @@ -1,128 +0,0 @@ - 'Enum("Completed, Running", "Running")', - 'JobInfo' => 'Varchar(255)' - ); - - private static $has_many = array( - 'TrackedPages' => 'BrokenExternalPageTrack', - 'BrokenLinks' => 'BrokenExternalLink' - ); - - /** - * Get the latest track status - * - * @return self - */ - public static function get_latest() { - return self::get() - ->sort('ID', 'DESC') - ->first(); - } - - /** - * Gets the list of Pages yet to be checked - * - * @return DataList - */ - public function getIncompletePageList() { - $pageIDs = $this - ->getIncompleteTracks() - ->column('PageID'); - if($pageIDs) return Versioned::get_by_stage('SiteTree', 'Stage') - ->byIDs($pageIDs); - } - - /** - * Get the list of incomplete BrokenExternalPageTrack - * - * @return DataList - */ - public function getIncompleteTracks() { - return $this - ->TrackedPages() - ->filter('Processed', 0); - } - - /** - * Get total pages count - */ - public function getTotalPages() { - return $this->TrackedPages()->count(); - } - - /** - * Get completed pages count - */ - public function getCompletedPages() { - return $this - ->TrackedPages() - ->filter('Processed', 1) - ->count(); - } - - /** - * Returns the latest run, or otherwise creates a new one - * - * @return self - */ - public static function get_or_create() { - // Check the current status - $status = self::get_latest(); - if ($status && $status->Status == 'Running') { - $status->updateStatus(); - return $status; - } - - return self::create_status(); - } - - /* - * Create and prepare a new status - * - * @return self - */ - public static function create_status() { - // If the script is to be started create a new status - $status = self::create(); - $status->updateJobInfo('Creating new tracking object'); - - // Setup all pages to test - $pageIDs = Versioned::get_by_stage('SiteTree', 'Stage') - ->column('ID'); - foreach ($pageIDs as $pageID) { - $trackPage = BrokenExternalPageTrack::create(); - $trackPage->PageID = $pageID; - $trackPage->StatusID = $status->ID; - $trackPage->write(); - } - - return $status; - } - - public function updateJobInfo($message) { - $this->JobInfo = $message; - $this->write(); - } - - /** - * Self check status - */ - public function updateStatus() { - if ($this->CompletedPages == $this->TotalPages) { - $this->Status = 'Completed'; - $this->updateJobInfo('Setting to completed'); - } - } -} \ No newline at end of file diff --git a/code/reports/BrokenExternalLinksReport.php b/code/reports/BrokenExternalLinksReport.php deleted file mode 100644 index 233d898..0000000 --- a/code/reports/BrokenExternalLinksReport.php +++ /dev/null @@ -1,83 +0,0 @@ - "Checked", - 'Link' => array( - 'title' => 'External Link', - 'formatting' => function($value, $item) { - return sprintf( - '%s', - Convert::raw2att($item->Link), - Convert::raw2xml($item->Link) - ); - } - ), - 'HTTPCodeDescription' => 'HTTP Error Code', - "Title" => array( - "title" => 'Page link is on', - 'formatting' => function($value, $item) { - $page = $item->Page(); - return sprintf( - '%s', - Convert::raw2att($page->CMSEditLink()), - Convert::raw2xml($page->Title) - ); - } - ) - ); - } - - /** - * Alias of columns(), to support the export to csv action - * in {@link GridFieldExportButton} generateExportFileData method. - * @return array - */ - public function getColumns() { - return $this->columns(); - } - - public function sourceRecords() { - $track = BrokenExternalPageTrackStatus::get_latest(); - if ($track) return $track->BrokenLinks(); - return new ArrayList(); - } - - public function getCMSFields() { - Requirements::javascript('externallinks/javascript/BrokenExternalLinksReport.js'); - $fields = parent::getCMSFields(); - - $reportResultSpan = '

'; - $reportResult = new LiteralField('ResultTitle', $reportResultSpan); - $fields->push($reportResult); - - $button = ''; - $runReportButton = new LiteralField( - 'runReport', - sprintf( - $button, - _t('ExternalBrokenLinksReport.RUNREPORT', 'Create new report') - ) - ); - $fields->push($runReportButton); - - return $fields; - } -} diff --git a/code/tasks/CheckExternalLinksTask.php b/code/tasks/CheckExternalLinksTask.php deleted file mode 100644 index 6c3fadf..0000000 --- a/code/tasks/CheckExternalLinksTask.php +++ /dev/null @@ -1,188 +0,0 @@ - '%$LinkChecker' - ); - - /** - * @var bool - */ - protected $silent = false; - - /** - * @var LinkChecker - */ - protected $linkChecker; - - protected $title = 'Checking broken External links in the SiteTree'; - - protected $description = 'A task that records external broken links in the SiteTree'; - - protected $enabled = true; - - /** - * Log a message - * - * @param string $message - */ - protected function log($message) { - if(!$this->silent) Debug::message($message); - } - - public function run($request) { - $this->runLinksCheck(); - } - /** - * Turn on or off message output - * - * @param bool $silent - */ - public function setSilent($silent) { - $this->silent = $silent; - } - - /** - * @param LinkChecker $linkChecker - */ - public function setLinkChecker(LinkChecker $linkChecker) { - $this->linkChecker = $linkChecker; - } - - /** - * @return LinkChecker - */ - public function getLinkChecker() { - return $this->linkChecker; - } - - /** - * Check the status of a single link on a page - * - * @param BrokenExternalPageTrack $pageTrack - * @param DOMNode $link - */ - protected function checkPageLink(BrokenExternalPageTrack $pageTrack, DOMNode $link) { - $class = $link->getAttribute('class'); - $href = $link->getAttribute('href'); - $markedBroken = preg_match('/\b(ss-broken)\b/', $class); - - // Check link - $httpCode = $this->linkChecker->checkLink($href); - if($httpCode === null) return; // Null link means uncheckable, such as an internal link - - // If this code is broken then mark as such - if($foundBroken = $this->isCodeBroken($httpCode)) { - // Create broken record - $brokenLink = new BrokenExternalLink(); - $brokenLink->Link = $href; - $brokenLink->HTTPCode = $httpCode; - $brokenLink->TrackID = $pageTrack->ID; - $brokenLink->StatusID = $pageTrack->StatusID; // Slight denormalisation here for performance reasons - $brokenLink->write(); - } - - // Check if we need to update CSS class, otherwise return - if($markedBroken == $foundBroken) return; - if($foundBroken) { - $class .= ' ss-broken'; - } else { - $class = preg_replace('/\s*\b(ss-broken)\b\s*/', ' ', $class); - } - $link->setAttribute('class', trim($class)); - } - - /** - * Determine if the given HTTP code is "broken" - * - * @param int $httpCode - * @return bool True if this is a broken code - */ - protected function isCodeBroken($httpCode) { - // Null represents no request attempted - if($httpCode === null) return false; - - // do we have any whitelisted codes - $ignoreCodes = Config::inst()->get('CheckExternalLinks', 'IgnoreCodes'); - if(is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) return false; - - // Check if code is outside valid range - return $httpCode < 200 || $httpCode > 302; - } - - /** - * Runs the links checker and returns the track used - * - * @param int $limit Limit to number of pages to run, or null to run all - * @return BrokenExternalPageTrackStatus - */ - public function runLinksCheck($limit = null) { - // Check the current status - $status = BrokenExternalPageTrackStatus::get_or_create(); - - // Calculate pages to run - $pageTracks = $status->getIncompleteTracks(); - if($limit) $pageTracks = $pageTracks->limit($limit); - - // Check each page - foreach ($pageTracks as $pageTrack) { - // Flag as complete - $pageTrack->Processed = 1; - $pageTrack->write(); - - // Check value of html area - $page = $pageTrack->Page(); - $this->log("Checking {$page->Title}"); - $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); - if (!$htmlValue->isValid()) continue; - - // Check each link - $links = $htmlValue->getElementsByTagName('a'); - foreach($links as $link) { - $this->checkPageLink($pageTrack, $link); - } - - // Update content of page based on link fixes / breakages - $htmlValue->saveHTML(); - $page->Content = $htmlValue->getContent(); - $page->write(); - - // Once all links have been created for this page update HasBrokenLinks - $count = $pageTrack->BrokenLinks()->count(); - $this->log("Found {$count} broken links"); - if($count) { - // Bypass the ORM as syncLinkTracking does not allow you to update HasBrokenLink to true - DB::query(sprintf( - 'UPDATE "SiteTree" SET "HasBrokenLink" = 1 WHERE "ID" = \'%d\'', - intval($pageTrack->ID) - )); - } - } - - $status->updateJobInfo('Updating completed pages'); - $status->updateStatus(); - return $status; - } - - private function updateCompletedPages($trackID = 0) { - $noPages = BrokenExternalPageTrack::get() - ->filter(array( - 'TrackID' => $trackID, - 'Processed' => 1 - )) - ->count(); - $track = BrokenExternalPageTrackStatus::get_latest(); - $track->CompletedPages = $noPages; - $track->write(); - return $noPages; - } - - private function updateJobInfo($message) { - $track = BrokenExternalPageTrackStatus::get_latest(); - if($track) { - $track->JobInfo = $message; - $track->write(); - } - } -} diff --git a/code/tasks/CurlLinkChecker.php b/code/tasks/CurlLinkChecker.php deleted file mode 100644 index a84439d..0000000 --- a/code/tasks/CurlLinkChecker.php +++ /dev/null @@ -1,49 +0,0 @@ - true) - ); - } - - /** - * Determine the http status code for a given link - * - * @param string $href URL to check - * @return int HTTP status code, or null if not checkable (not a link) - */ - public function checkLink($href) { - // Skip non-external links - if(!preg_match('/^https?[^:]*:\/\//', $href)) return null; - - // Check if we have a cached result - $cacheKey = md5($href); - $result = $this->getCache()->load($cacheKey); - if($result !== false) return $result; - - // No cached result so just request - $handle = curl_init($href); - curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE); - curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5); - curl_setopt($handle, CURLOPT_TIMEOUT, 10); - curl_exec($handle); - $httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE); - curl_close($handle); - - // Cache result - $this->getCache()->save($httpCode, $cacheKey); - return $httpCode; - } -} diff --git a/code/tasks/LinkChecker.php b/code/tasks/LinkChecker.php deleted file mode 100644 index 629f177..0000000 --- a/code/tasks/LinkChecker.php +++ /dev/null @@ -1,15 +0,0 @@ - + + CodeSniffer ruleset for SilverStripe coding conventions. + + + + + + + diff --git a/phpunit.xml.dist b/phpunit.xml.dist new file mode 100644 index 0000000..b543ea6 --- /dev/null +++ b/phpunit.xml.dist @@ -0,0 +1,13 @@ + + + tests/ + + + + src/ + + tests/ + + + + diff --git a/src/Controllers/CMSExternalLinksController.php b/src/Controllers/CMSExternalLinksController.php new file mode 100644 index 0000000..7f35df1 --- /dev/null +++ b/src/Controllers/CMSExternalLinksController.php @@ -0,0 +1,73 @@ +response); + $this->response + ->addHeader('Content-Type', 'application/json') + ->addHeader('Content-Encoding', 'UTF-8') + ->addHeader('X-Content-Type-Options', 'nosniff'); + + // Format status + $track = BrokenExternalPageTrackStatus::get_latest(); + if ($track) { + return json_encode([ + 'TrackID' => $track->ID, + 'Status' => $track->Status, + 'Completed' => $track->getCompletedPages(), + 'Total' => $track->getTotalPages() + ]); + } + } + + + /** + * Starts a broken external link check + */ + public function start() + { + // return if the a job is already running + $status = BrokenExternalPageTrackStatus::get_latest(); + if ($status && $status->Status == 'Running') { + return; + } + + // Create a new job + if (class_exists(QueuedJobService::class)) { + // Force the creation of a new run + BrokenExternalPageTrackStatus::create_status(); + $checkLinks = new CheckExternalLinksJob(); + singleton(QueuedJobService::class)->queueJob($checkLinks); + } else { + //TODO this hangs as it waits for the connection to be released + // should return back and continue processing + // http://us3.php.net/manual/en/features.connection-handling.php + $task = CheckExternalLinksTask::create(); + $task->runLinksCheck(); + } + } +} diff --git a/src/Jobs/CheckExternalLinksJob.php b/src/Jobs/CheckExternalLinksJob.php new file mode 100644 index 0000000..642ce3e --- /dev/null +++ b/src/Jobs/CheckExternalLinksJob.php @@ -0,0 +1,46 @@ +runLinksCheck(1); + $this->currentStep = $track->CompletedPages; + $this->totalSteps = $track->TotalPages; + $this->isComplete = $track->Status === 'Completed'; + } +} diff --git a/src/Model/BrokenExternalLink.php b/src/Model/BrokenExternalLink.php new file mode 100644 index 0000000..0aa2037 --- /dev/null +++ b/src/Model/BrokenExternalLink.php @@ -0,0 +1,85 @@ + 'Varchar(2083)', // 2083 is the maximum length of a URL in Internet Explorer. + 'HTTPCode' =>'Int' + ); + + private static $has_one = array( + 'Track' => BrokenExternalPageTrack::class, + 'Status' => BrokenExternalPageTrackStatus::class + ); + + private static $summary_fields = array( + 'Created' => 'Checked', + 'Link' => 'External Link', + 'HTTPCodeDescription' => 'HTTP Error Code', + 'Page.Title' => 'Page link is on' + ); + + private static $searchable_fields = array( + 'HTTPCode' => array('title' => 'HTTP Code') + ); + + /** + * @return SiteTree + */ + public function Page() + { + return $this->Track()->Page(); + } + + public function canEdit($member = false) + { + return false; + } + + public function canView($member = false) + { + $member = $member ? $member : Member::currentUser(); + $codes = array('content-authors', 'administrators'); + return Permission::checkMember($member, $codes); + } + + /** + * Retrieve a human readable description of a response code + * + * @return string + */ + public function getHTTPCodeDescription() + { + $code = $this->HTTPCode; + + try { + $response = HTTPResponse::create('', $code); + // Assume that $code = 0 means there was no response + $description = $code ? + $response->getStatusDescription() : + _t(__CLASS__ . '.NOTAVAILABLE', 'Server Not Available'); + } catch (InvalidArgumentException $e) { + $description = _t(__CLASS__ . '.UNKNOWNRESPONSE', 'Unknown Response Code'); + } + + return sprintf("%d (%s)", $code, $description); + } +} diff --git a/src/Model/BrokenExternalPageTrack.php b/src/Model/BrokenExternalPageTrack.php new file mode 100644 index 0000000..d1b257f --- /dev/null +++ b/src/Model/BrokenExternalPageTrack.php @@ -0,0 +1,39 @@ + 'Boolean' + ); + + private static $has_one = array( + 'Page' => SiteTree::class, + 'Status' => BrokenExternalPageTrackStatus::class + ); + + private static $has_many = array( + 'BrokenLinks' => BrokenExternalLink::class + ); + + /** + * @return SiteTree + */ + public function Page() + { + return Versioned::get_by_stage(SiteTree::class, 'Stage') + ->byID($this->PageID); + } +} diff --git a/src/Model/BrokenExternalPageTrackStatus.php b/src/Model/BrokenExternalPageTrackStatus.php new file mode 100644 index 0000000..c34596e --- /dev/null +++ b/src/Model/BrokenExternalPageTrackStatus.php @@ -0,0 +1,153 @@ + 'Enum("Completed, Running", "Running")', + 'JobInfo' => 'Varchar(255)' + ); + + private static $has_many = array( + 'TrackedPages' => BrokenExternalPageTrack::class, + 'BrokenLinks' => BrokenExternalLink::class + ); + + /** + * Get the latest track status + * + * @return BrokenExternalPageTrackStatus + */ + public static function get_latest() + { + return self::get() + ->sort('ID', 'DESC') + ->first(); + } + + /** + * Gets the list of Pages yet to be checked + * + * @return DataList + */ + public function getIncompletePageList() + { + $pageIDs = $this + ->getIncompleteTracks() + ->column('PageID'); + if ($pageIDs) { + return Versioned::get_by_stage(SiteTree::class, 'Stage') + ->byIDs($pageIDs); + } + } + + /** + * Get the list of incomplete BrokenExternalPageTrack + * + * @return DataList + */ + public function getIncompleteTracks() + { + return $this + ->TrackedPages() + ->filter('Processed', 0); + } + + /** + * Get total pages count + * + * @return int + */ + public function getTotalPages() + { + return $this->TrackedPages()->count(); + } + + /** + * Get completed pages count + * + * @return int + */ + public function getCompletedPages() + { + return $this + ->TrackedPages() + ->filter('Processed', 1) + ->count(); + } + + /** + * Returns the latest run, or otherwise creates a new one + * + * @return BrokenExternalPageTrackStatus + */ + public static function get_or_create() + { + // Check the current status + $status = self::get_latest(); + if ($status && $status->Status == 'Running') { + $status->updateStatus(); + return $status; + } + + return self::create_status(); + } + + /** + * Create and prepare a new status + * + * @return BrokenExternalPageTrackStatus + */ + public static function create_status() + { + // If the script is to be started create a new status + $status = self::create(); + $status->updateJobInfo('Creating new tracking object'); + + // Setup all pages to test + $pageIDs = Versioned::get_by_stage(SiteTree::class, 'Stage') + ->column('ID'); + foreach ($pageIDs as $pageID) { + $trackPage = BrokenExternalPageTrack::create(); + $trackPage->PageID = $pageID; + $trackPage->StatusID = $status->ID; + $trackPage->write(); + } + + return $status; + } + + public function updateJobInfo($message) + { + $this->JobInfo = $message; + $this->write(); + } + + /** + * Self check status + */ + public function updateStatus() + { + if ($this->CompletedPages == $this->TotalPages) { + $this->Status = 'Completed'; + $this->updateJobInfo('Setting to completed'); + } + } +} diff --git a/src/Reports/BrokenExternalLinksReport.php b/src/Reports/BrokenExternalLinksReport.php new file mode 100644 index 0000000..fec065a --- /dev/null +++ b/src/Reports/BrokenExternalLinksReport.php @@ -0,0 +1,102 @@ + "Checked", + 'Link' => array( + 'title' => 'External Link', + 'formatting' => function ($value, $item) { + return sprintf( + '%s', + Convert::raw2att($item->Link), + Convert::raw2xml($item->Link) + ); + } + ), + 'HTTPCodeDescription' => 'HTTP Error Code', + "Title" => array( + "title" => 'Page link is on', + 'formatting' => function ($value, $item) { + $page = $item->Page(); + return sprintf( + '%s', + Convert::raw2att($page->CMSEditLink()), + Convert::raw2xml($page->Title) + ); + } + ) + ); + } + + /** + * Alias of columns(), to support the export to csv action + * in {@link GridFieldExportButton} generateExportFileData method. + * @return array + */ + public function getColumns() + { + return $this->columns(); + } + + public function sourceRecords() + { + $track = BrokenExternalPageTrackStatus::get_latest(); + if ($track) { + return $track->BrokenLinks(); + } + return ArrayList::create(); + } + + public function getCMSFields() + { + Requirements::javascript('silverstripe/externallinks: javascript/BrokenExternalLinksReport.js'); + $fields = parent::getCMSFields(); + + $reportResultSpan = '

'; + $reportResult = LiteralField::create('ResultTitle', $reportResultSpan); + $fields->push($reportResult); + + $button = HTML::createTag( + 'button', + [ + 'id' => 'externalLinksReport', + 'type' => 'button', + 'class' => 'btn btn-primary' + ], + _t(__CLASS__ . '.RUNREPORT', 'Create new report') + ); + $runReportButton = LiteralField::create('runReport', $button); + $fields->push($runReportButton); + + return $fields; + } +} diff --git a/src/Tasks/CheckExternalLinksTask.php b/src/Tasks/CheckExternalLinksTask.php new file mode 100644 index 0000000..84a8551 --- /dev/null +++ b/src/Tasks/CheckExternalLinksTask.php @@ -0,0 +1,230 @@ + '%$' . LinkChecker::class + ]; + + /** + * @var bool + */ + protected $silent = false; + + /** + * @var LinkChecker + */ + protected $linkChecker; + + protected $title = 'Checking broken External links in the SiteTree'; + + protected $description = 'A task that records external broken links in the SiteTree'; + + protected $enabled = true; + + /** + * Log a message + * + * @param string $message + */ + protected function log($message) + { + if (!$this->silent) { + Debug::message($message); + } + } + + public function run($request) + { + $this->runLinksCheck(); + } + /** + * Turn on or off message output + * + * @param bool $silent + */ + public function setSilent($silent) + { + $this->silent = $silent; + } + + /** + * @param LinkChecker $linkChecker + */ + public function setLinkChecker(LinkChecker $linkChecker) + { + $this->linkChecker = $linkChecker; + } + + /** + * @return LinkChecker + */ + public function getLinkChecker() + { + return $this->linkChecker; + } + + /** + * Check the status of a single link on a page + * + * @param BrokenExternalPageTrack $pageTrack + * @param DOMNode $link + */ + protected function checkPageLink(BrokenExternalPageTrack $pageTrack, DOMNode $link) + { + $class = $link->getAttribute('class'); + $href = $link->getAttribute('href'); + $markedBroken = preg_match('/\b(ss-broken)\b/', $class); + + // Check link + $httpCode = $this->linkChecker->checkLink($href); + if ($httpCode === null) { + return; // Null link means uncheckable, such as an internal link + } + + // If this code is broken then mark as such + if ($foundBroken = $this->isCodeBroken($httpCode)) { + // Create broken record + $brokenLink = new BrokenExternalLink(); + $brokenLink->Link = $href; + $brokenLink->HTTPCode = $httpCode; + $brokenLink->TrackID = $pageTrack->ID; + $brokenLink->StatusID = $pageTrack->StatusID; // Slight denormalisation here for performance reasons + $brokenLink->write(); + } + + // Check if we need to update CSS class, otherwise return + if ($markedBroken == $foundBroken) { + return; + } + if ($foundBroken) { + $class .= ' ss-broken'; + } else { + $class = preg_replace('/\s*\b(ss-broken)\b\s*/', ' ', $class); + } + $link->setAttribute('class', trim($class)); + } + + /** + * Determine if the given HTTP code is "broken" + * + * @param int $httpCode + * @return bool True if this is a broken code + */ + protected function isCodeBroken($httpCode) + { + // Null represents no request attempted + if ($httpCode === null) { + return false; + } + + // do we have any whitelisted codes + $ignoreCodes = $this->config()->get('IgnoreCodes'); + if (is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) { + return false; + } + + // Check if code is outside valid range + return $httpCode < 200 || $httpCode > 302; + } + + /** + * Runs the links checker and returns the track used + * + * @param int $limit Limit to number of pages to run, or null to run all + * @return BrokenExternalPageTrackStatus + */ + public function runLinksCheck($limit = null) + { + // Check the current status + $status = BrokenExternalPageTrackStatus::get_or_create(); + + // Calculate pages to run + $pageTracks = $status->getIncompleteTracks(); + if ($limit) { + $pageTracks = $pageTracks->limit($limit); + } + + // Check each page + foreach ($pageTracks as $pageTrack) { + // Flag as complete + $pageTrack->Processed = 1; + $pageTrack->write(); + + // Check value of html area + $page = $pageTrack->Page(); + $this->log("Checking {$page->Title}"); + $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); + if (!$htmlValue->isValid()) { + continue; + } + + // Check each link + $links = $htmlValue->getElementsByTagName('a'); + foreach ($links as $link) { + $this->checkPageLink($pageTrack, $link); + } + + // Update content of page based on link fixes / breakages + $htmlValue->saveHTML(); + $page->Content = $htmlValue->getContent(); + $page->write(); + + // Once all links have been created for this page update HasBrokenLinks + $count = $pageTrack->BrokenLinks()->count(); + $this->log("Found {$count} broken links"); + if ($count) { + $siteTreeTable = DataObject::getSchema()->tableName(SiteTree::class); + // Bypass the ORM as syncLinkTracking does not allow you to update HasBrokenLink to true + DB::query(sprintf( + 'UPDATE "%s" SET "HasBrokenLink" = 1 WHERE "ID" = \'%d\'', + $siteTreeTable, + intval($pageTrack->ID) + )); + } + } + + $status->updateJobInfo('Updating completed pages'); + $status->updateStatus(); + return $status; + } + + private function updateCompletedPages($trackID = 0) + { + $noPages = BrokenExternalPageTrack::get() + ->filter(array( + 'TrackID' => $trackID, + 'Processed' => 1 + )) + ->count(); + $track = BrokenExternalPageTrackStatus::get_latest(); + $track->CompletedPages = $noPages; + $track->write(); + return $noPages; + } + + private function updateJobInfo($message) + { + $track = BrokenExternalPageTrackStatus::get_latest(); + if ($track) { + $track->JobInfo = $message; + $track->write(); + } + } +} diff --git a/src/Tasks/CurlLinkChecker.php b/src/Tasks/CurlLinkChecker.php new file mode 100644 index 0000000..0a11f3d --- /dev/null +++ b/src/Tasks/CurlLinkChecker.php @@ -0,0 +1,57 @@ +get(CacheInterface::class . '.CurlLinkChecker'); + } + + /** + * Determine the http status code for a given link + * + * @param string $href URL to check + * @return int HTTP status code, or null if not checkable (not a link) + */ + public function checkLink($href) + { + // Skip non-external links + if (!preg_match('/^https?[^:]*:\/\//', $href)) { + return null; + } + + // Check if we have a cached result + $cacheKey = md5($href); + $result = $this->getCache()->get($cacheKey); + if ($result !== false) { + return $result; + } + + // No cached result so just request + $handle = curl_init($href); + curl_setopt($handle, CURLOPT_RETURNTRANSFER, true); + curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5); + curl_setopt($handle, CURLOPT_TIMEOUT, 10); + curl_exec($handle); + $httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE); + curl_close($handle); + + // Cache result + $this->getCache()->set($httpCode, $cacheKey); + return $httpCode; + } +} diff --git a/src/Tasks/LinkChecker.php b/src/Tasks/LinkChecker.php new file mode 100644 index 0000000..146458b --- /dev/null +++ b/src/Tasks/LinkChecker.php @@ -0,0 +1,18 @@ + array('Translatable') + protected static $fixture_file = 'ExternalLinksTest.yml'; + + protected static $extra_dataobjects = array( + ExternalLinksTestPage::class ); - public function setUpOnce() { - if (class_exists('Phockito')) { - Phockito::include_hamcrest(false); - } + protected function setUp() + { + parent::setUp(); - parent::setUpOnce(); - } + // Stub link checker + $checker = new PretendLinkChecker; + Injector::inst()->registerService($checker, LinkChecker::class); + } - public function setUp() { - parent::setUp(); + public function testLinks() + { + // Run link checker + $task = CheckExternalLinksTask::create(); + $task->setSilent(true); // Be quiet during the test! + $task->runLinksCheck(); - // Check dependencies - if (!class_exists('Phockito')) { - $this->skipTest = true; - return $this->markTestSkipped("These tests need the Phockito module installed to run"); - } + // Get all links checked + $status = BrokenExternalPageTrackStatus::get_latest(); + $this->assertEquals('Completed', $status->Status); + $this->assertEquals(5, $status->TotalPages); + $this->assertEquals(5, $status->CompletedPages); - // Mock link checker - $checker = Phockito::mock('LinkChecker'); - Phockito::when($checker) - ->checkLink('http://www.working.com') - ->return(200); + // Check all pages have had the correct HTML adjusted + for ($i = 1; $i <= 5; $i++) { + $page = $this->objFromFixture(ExternalLinksTestPage::class, 'page'.$i); + $this->assertNotEmpty($page->Content); + $this->assertEquals( + $page->ExpectedContent, + $page->Content, + "Assert that the content of page{$i} has been updated" + ); + } - Phockito::when($checker) - ->checkLink('http://www.broken.com/url/thing') // 404 on working site - ->return(404); + // Check that the correct report of broken links is generated + $links = $status + ->BrokenLinks() + ->sort('Link'); - Phockito::when($checker) - ->checkLink('http://www.broken.com') // 403 on working site - ->return(403); + $this->assertEquals(4, $links->count()); + $this->assertEquals( + array( + 'http://www.broken.com', + 'http://www.broken.com/url/thing', + 'http://www.broken.com/url/thing', + 'http://www.nodomain.com' + ), + array_values($links->map('ID', 'Link')->toArray()) + ); - Phockito::when($checker) - ->checkLink('http://www.nodomain.com') // no ping - ->return(0); + // Check response codes are correct + $expected = array( + 'http://www.broken.com' => 403, + 'http://www.broken.com/url/thing' => 404, + 'http://www.nodomain.com' => 0 + ); + $actual = $links->map('Link', 'HTTPCode')->toArray(); + $this->assertEquals($expected, $actual); - Phockito::when($checker) - ->checkLink('/internal/link') - ->return(null); + // Check response descriptions are correct + i18n::set_locale('en_NZ'); + $expected = array( + 'http://www.broken.com' => '403 (Forbidden)', + 'http://www.broken.com/url/thing' => '404 (Not Found)', + 'http://www.nodomain.com' => '0 (Server Not Available)' + ); + $actual = $links->map('Link', 'HTTPCodeDescription')->toArray(); + $this->assertEquals($expected, $actual); + } - Phockito::when($checker) - ->checkLink('[sitetree_link,id=9999]') - ->return(null); - - Phockito::when($checker) - ->checkLink('home') - ->return(null); - - Phockito::when($checker) - ->checkLink('broken-internal') - ->return(null); - - Phockito::when($checker) - ->checkLink('[sitetree_link,id=1]') - ->return(null); - - Phockito::when($checker) - ->checkLink(Hamcrest_Matchers::anything()) // anything else is 404 - ->return(404); - - Injector::inst()->registerService($checker, 'LinkChecker'); - } - - public function testLinks() { - // Run link checker - $task = CheckExternalLinksTask::create(); - $task->setSilent(true); // Be quiet during the test! - $task->runLinksCheck(); - - // Get all links checked - $status = BrokenExternalPageTrackStatus::get_latest(); - $this->assertEquals('Completed', $status->Status); - $this->assertEquals(5, $status->TotalPages); - $this->assertEquals(5, $status->CompletedPages); - - // Check all pages have had the correct HTML adjusted - for($i = 1; $i <= 5; $i++) { - $page = $this->objFromFixture('ExternalLinksTestPage', 'page'.$i); - $this->assertNotEmpty($page->Content); - $this->assertEquals( - $page->ExpectedContent, - $page->Content, - "Assert that the content of page{$i} has been updated" - ); - } - - // Check that the correct report of broken links is generated - $links = $status - ->BrokenLinks() - ->sort('Link'); - - $this->assertEquals(4, $links->count()); - $this->assertEquals( - array( - 'http://www.broken.com', - 'http://www.broken.com/url/thing', - 'http://www.broken.com/url/thing', - 'http://www.nodomain.com' - ), - array_values($links->map('ID', 'Link')->toArray()) - ); - - // Check response codes are correct - $expected = array( - 'http://www.broken.com' => 403, - 'http://www.broken.com/url/thing' => 404, - 'http://www.nodomain.com' => 0 - ); - $actual = $links->map('Link', 'HTTPCode')->toArray(); - $this->assertEquals($expected, $actual); - - // Check response descriptions are correct - i18n::set_locale('en_NZ'); - $expected = array( - 'http://www.broken.com' => '403 (Forbidden)', - 'http://www.broken.com/url/thing' => '404 (Not Found)', - 'http://www.nodomain.com' => '0 (Server Not Available)' - ); - $actual = $links->map('Link', 'HTTPCodeDescription')->toArray(); - $this->assertEquals($expected, $actual); - } - - /** - * Test that broken links appears in the reports list - */ - public function testReportExists() { - $reports = SS_Report::get_reports(); - $reportNames = array(); - foreach($reports as $report) { - $reportNames[] = $report->class; - } - $this->assertContains('BrokenExternalLinksReport',$reportNames, - 'BrokenExternalLinksReport is in reports list'); - } + /** + * Test that broken links appears in the reports list + */ + public function testReportExists() + { + $reports = Report::get_reports(); + $reportNames = array(); + foreach ($reports as $report) { + $reportNames[] = get_class($report); + } + $this->assertContains( + BrokenExternalLinksReport::class, + $reportNames, + 'BrokenExternalLinksReport is in reports list' + ); + } } diff --git a/tests/ExternalLinksTest.yml b/tests/ExternalLinksTest.yml index 7b026b2..59992d0 100644 --- a/tests/ExternalLinksTest.yml +++ b/tests/ExternalLinksTest.yml @@ -1,4 +1,4 @@ -ExternalLinksTestPage: +SilverStripe\ExternalLinks\Tests\ExternalLinksTestPage: # Tests mix of broken and working external links page1: Title: 'Page 1' diff --git a/tests/ExternalLinksTestPage.php b/tests/ExternalLinksTestPage.php index 69fd768..a4ce0e5 100644 --- a/tests/ExternalLinksTestPage.php +++ b/tests/ExternalLinksTestPage.php @@ -1,7 +1,14 @@ 'HTMLText' ); diff --git a/tests/Stubs/PretendLinkChecker.php b/tests/Stubs/PretendLinkChecker.php new file mode 100644 index 0000000..8701422 --- /dev/null +++ b/tests/Stubs/PretendLinkChecker.php @@ -0,0 +1,28 @@ +