Tests, refactor, cleanup, fix issues
This commit is contained in:
parent
f55a650d24
commit
a4ede246ab
|
@ -1 +0,0 @@
|
|||
<?php
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
Name: externallinksdependencies
|
||||
---
|
||||
Injector:
|
||||
LinkChecker: CurlLinkChecker
|
|
@ -10,13 +10,21 @@ class CMSExternalLinks_Controller extends Controller {
|
|||
* @return string JSON string detailing status of the job
|
||||
*/
|
||||
public function getJobStatus() {
|
||||
$track = CheckExternalLinks::getLatestTrack();
|
||||
if (!$track || !$track->exists()) return null;
|
||||
echo json_encode(array(
|
||||
// Set headers
|
||||
HTTP::set_cache_age(0);
|
||||
HTTP::add_cache_headers($this->response);
|
||||
$this->response
|
||||
->addHeader('Content-Type', 'application/json')
|
||||
->addHeader('Content-Encoding', 'UTF-8')
|
||||
->addHeader('X-Content-Type-Options', 'nosniff');
|
||||
|
||||
// Format status
|
||||
$track = BrokenExternalPageTrackStatus::get_latest();
|
||||
if($track) return json_encode(array(
|
||||
'TrackID' => $track->ID,
|
||||
'Status' => $track->Status,
|
||||
'Completed' => $track->CompletedPages,
|
||||
'Total' => $track->TotalPages
|
||||
'Completed' => $track->getCompletedPages(),
|
||||
'Total' => $track->getTotalPages()
|
||||
));
|
||||
}
|
||||
|
||||
|
@ -25,36 +33,22 @@ class CMSExternalLinks_Controller extends Controller {
|
|||
* Starts a broken external link check
|
||||
*/
|
||||
public function start() {
|
||||
$status = checkExternalLinks::getLatestTrackStatus();
|
||||
// return if the a job is already running
|
||||
if ($status == 'Running') {
|
||||
return;
|
||||
}
|
||||
$status = BrokenExternalPageTrackStatus::get_latest();
|
||||
if ($status && $status->Status == 'Running') return;
|
||||
|
||||
// Create a new job
|
||||
if (class_exists('QueuedJobService')) {
|
||||
$pages = Versioned::get_by_stage('SiteTree', 'Stage');
|
||||
$noPages = count($pages);
|
||||
|
||||
$track = BrokenExternalPageTrackStatus::create();
|
||||
$track->TotalPages = $noPages;
|
||||
$track->Status = 'Running';
|
||||
$track->write();
|
||||
|
||||
foreach ($pages as $page) {
|
||||
$trackPage = BrokenExternalPageTrack::create();
|
||||
$trackPage->PageID = $page->ID;
|
||||
$trackPage->TrackID = $track->ID;
|
||||
$trackPage->write();
|
||||
}
|
||||
|
||||
// Force the creation of a new run
|
||||
BrokenExternalPageTrackStatus::create_status();
|
||||
$checkLinks = new CheckExternalLinksJob();
|
||||
singleton('QueuedJobService')
|
||||
->queueJob($checkLinks, date('Y-m-d H:i:s', time() + 1));
|
||||
singleton('QueuedJobService')->queueJob($checkLinks);
|
||||
} else {
|
||||
//TODO this hangs as it waits for the connection to be released
|
||||
// should return back and continue processing
|
||||
// should return back and continue processing
|
||||
// http://us3.php.net/manual/en/features.connection-handling.php
|
||||
$task = new CheckExternalLinks();
|
||||
$task->run();
|
||||
$task = CheckExternalLinksTask::create();
|
||||
$task->runLinksCheck();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,7 +24,7 @@ class CheckExternalLinksJob extends AbstractQueuedJob implements QueuedJob {
|
|||
* Check an individual page
|
||||
*/
|
||||
public function process() {
|
||||
$task = new CheckExternalLinks();
|
||||
$task = CheckExternalLinksTask::create();
|
||||
$track = $task->runLinksCheck(1);
|
||||
$this->currentStep = $track->CompletedPages;
|
||||
$this->totalSteps = $track->TotalPages;
|
||||
|
|
|
@ -1,5 +1,11 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Represents a single link checked for a single run that is broken
|
||||
*
|
||||
* @method BrokenExternalPageTrack Track()
|
||||
* @method BrokenExternalPageTrackStatus Status()
|
||||
*/
|
||||
class BrokenExternalLink extends DataObject {
|
||||
|
||||
private static $db = array(
|
||||
|
@ -8,10 +14,17 @@ class BrokenExternalLink extends DataObject {
|
|||
);
|
||||
|
||||
private static $has_one = array(
|
||||
'Page' => 'Page',
|
||||
'Track' => 'BrokenExternalLink'
|
||||
'Track' => 'BrokenExternalPageTrack',
|
||||
'Status' => 'BrokenExternalPageTrackStatus'
|
||||
);
|
||||
|
||||
/**
|
||||
* @return SiteTree
|
||||
*/
|
||||
public function Page() {
|
||||
return $this->Track()->Page();
|
||||
}
|
||||
|
||||
public static $summary_fields = array(
|
||||
'Page.Title' => 'Page',
|
||||
'HTTPCode' => 'HTTP Code',
|
||||
|
@ -33,22 +46,4 @@ class BrokenExternalLink extends DataObject {
|
|||
}
|
||||
}
|
||||
|
||||
class BrokenExternalPageTrackStatus extends DataObject {
|
||||
private static $db = array(
|
||||
'Status' => 'Enum("Completed, Running", "Running")',
|
||||
'TotalPages' => 'Int',
|
||||
'CompletedPages' => 'Int',
|
||||
'JobInfo' => 'Varchar(255)'
|
||||
);
|
||||
}
|
||||
|
||||
class BrokenExternalPageTrack extends DataObject {
|
||||
private static $db = array(
|
||||
'TrackID' => 'Int',
|
||||
'Processed' => 'Boolean'
|
||||
);
|
||||
|
||||
private static $has_one = array(
|
||||
'Page' => 'Page'
|
||||
);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Represents a track for a single page
|
||||
*/
|
||||
class BrokenExternalPageTrack extends DataObject {
|
||||
|
||||
private static $db = array(
|
||||
'Processed' => 'Boolean'
|
||||
);
|
||||
|
||||
private static $has_one = array(
|
||||
'Page' => 'SiteTree',
|
||||
'Status' => 'BrokenExternalPageTrackStatus'
|
||||
);
|
||||
|
||||
private static $has_many = array(
|
||||
'BrokenLinks' => 'BrokenExternalLink'
|
||||
);
|
||||
|
||||
/**
|
||||
* @return SiteTree
|
||||
*/
|
||||
public function Page() {
|
||||
return Versioned::get_by_stage('SiteTree', 'Stage')
|
||||
->byID($this->PageID);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,128 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Represents the status of a track run
|
||||
*
|
||||
* @method DataList TrackedPages()
|
||||
* @method DataList BrokenLinks()
|
||||
* @property int $TotalPages Get total pages count
|
||||
* @property int $CompletedPages Get completed pages count
|
||||
*/
|
||||
class BrokenExternalPageTrackStatus extends DataObject {
|
||||
|
||||
private static $db = array(
|
||||
'Status' => 'Enum("Completed, Running", "Running")',
|
||||
'JobInfo' => 'Varchar(255)'
|
||||
);
|
||||
|
||||
private static $has_many = array(
|
||||
'TrackedPages' => 'BrokenExternalPageTrack',
|
||||
'BrokenLinks' => 'BrokenExternalLink'
|
||||
);
|
||||
|
||||
/**
|
||||
* Get the latest track status
|
||||
*
|
||||
* @return self
|
||||
*/
|
||||
public static function get_latest() {
|
||||
return self::get()
|
||||
->sort('ID', 'DESC')
|
||||
->first();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the list of Pages yet to be checked
|
||||
*
|
||||
* @return DataList
|
||||
*/
|
||||
public function getIncompletePageList() {
|
||||
$pageIDs = $this
|
||||
->getIncompleteTracks()
|
||||
->column('PageID');
|
||||
if($pageIDs) return Versioned::get_by_stage('SiteTree', 'Stage')
|
||||
->byIDs($pageIDs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the list of incomplete BrokenExternalPageTrack
|
||||
*
|
||||
* @return DataList
|
||||
*/
|
||||
public function getIncompleteTracks() {
|
||||
return $this
|
||||
->TrackedPages()
|
||||
->filter('Processed', 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get total pages count
|
||||
*/
|
||||
public function getTotalPages() {
|
||||
return $this->TrackedPages()->count();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get completed pages count
|
||||
*/
|
||||
public function getCompletedPages() {
|
||||
return $this
|
||||
->TrackedPages()
|
||||
->filter('Processed', 1)
|
||||
->count();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the latest run, or otherwise creates a new one
|
||||
*
|
||||
* @return self
|
||||
*/
|
||||
public static function get_or_create() {
|
||||
// Check the current status
|
||||
$status = self::get_latest();
|
||||
if ($status && $status->Status == 'Running') {
|
||||
$status->updateStatus();
|
||||
return $status;
|
||||
}
|
||||
|
||||
return self::create_status();
|
||||
}
|
||||
|
||||
/*
|
||||
* Create and prepare a new status
|
||||
*
|
||||
* @return self
|
||||
*/
|
||||
public static function create_status() {
|
||||
// If the script is to be started create a new status
|
||||
$status = self::create();
|
||||
$status->updateJobInfo('Creating new tracking object');
|
||||
|
||||
// Setup all pages to test
|
||||
$pageIDs = Versioned::get_by_stage('SiteTree', 'Stage')
|
||||
->column('ID');
|
||||
foreach ($pageIDs as $pageID) {
|
||||
$trackPage = BrokenExternalPageTrack::create();
|
||||
$trackPage->PageID = $pageID;
|
||||
$trackPage->StatusID = $status->ID;
|
||||
$trackPage->write();
|
||||
}
|
||||
|
||||
return $status;
|
||||
}
|
||||
|
||||
public function updateJobInfo($message) {
|
||||
$this->JobInfo = $message;
|
||||
$this->write();
|
||||
}
|
||||
|
||||
/**
|
||||
* Self check status
|
||||
*/
|
||||
public function updateStatus() {
|
||||
if ($this->CompletedPages == $this->TotalPages) {
|
||||
$this->Status = 'Completed';
|
||||
$this->updateJobInfo('Setting to completed');
|
||||
}
|
||||
}
|
||||
}
|
|
@ -8,43 +8,41 @@
|
|||
|
||||
class BrokenExternalLinksReport extends SS_Report {
|
||||
|
||||
/**
|
||||
* Columns in the report
|
||||
*
|
||||
* @var array
|
||||
* @config
|
||||
*/
|
||||
private static $columns = array(
|
||||
'Created' => 'Checked',
|
||||
'Link' => 'External Link',
|
||||
'HTTPCode' => 'HTTP Error Code',
|
||||
'PageLink' => array(
|
||||
'title' => 'Page link is on',
|
||||
'link' => true
|
||||
),
|
||||
);
|
||||
|
||||
public function init() {
|
||||
parent::init();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the report title
|
||||
*
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function title() {
|
||||
return _t('ExternalBrokenLinksReport.EXTERNALBROKENLINKS',
|
||||
"External broken links report");
|
||||
return _t('ExternalBrokenLinksReport.EXTERNALBROKENLINKS', "External broken links report");
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the column names of the report
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function columns() {
|
||||
return self::$columns;
|
||||
return array(
|
||||
"Created" => "Checked",
|
||||
'Link' => array(
|
||||
'title' => 'External Link',
|
||||
'formatting' => function($value, $item) {
|
||||
return sprintf(
|
||||
'<a target="_blank" href="%s">%s</a>',
|
||||
Convert::raw2att($item->Link),
|
||||
Convert::raw2xml($item->Link)
|
||||
);
|
||||
}
|
||||
),
|
||||
'HTTPCode' => 'HTTP Error Code',
|
||||
"Title" => array(
|
||||
"title" => 'Page link is on',
|
||||
'formatting' => function($value, $item) {
|
||||
$page = $item->Page();
|
||||
return sprintf(
|
||||
'<a href="%s">%s</a>',
|
||||
Convert::raw2att($page->CMSEditLink()),
|
||||
Convert::raw2xml($page->Title)
|
||||
);
|
||||
}
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -57,20 +55,9 @@ class BrokenExternalLinksReport extends SS_Report {
|
|||
}
|
||||
|
||||
public function sourceRecords() {
|
||||
$track = CheckExternalLinks::getLatestTrack();
|
||||
$returnSet = new ArrayList();
|
||||
if ($track && $track->exists()) {
|
||||
$links = BrokenExternalLink::get()
|
||||
->filter('TrackID', $track->ID);
|
||||
} else {
|
||||
$links = BrokenExternalLink::get();
|
||||
}
|
||||
foreach ($links as $link) {
|
||||
$link->PageLink = $link->Page()->Title;
|
||||
$link->ID = $link->Page()->ID;
|
||||
$returnSet->push($link);
|
||||
}
|
||||
return $returnSet;
|
||||
$track = BrokenExternalPageTrackStatus::get_latest();
|
||||
if ($track) return $track->BrokenLinks();
|
||||
return new ArrayList();
|
||||
}
|
||||
|
||||
public function getCMSFields() {
|
||||
|
|
|
@ -1,218 +0,0 @@
|
|||
<?php
|
||||
|
||||
class CheckExternalLinks extends BuildTask {
|
||||
public $limit = 10;
|
||||
|
||||
protected $title = 'Checking broken External links in the SiteTree';
|
||||
|
||||
protected $description = 'A task that records external broken links in the SiteTree';
|
||||
|
||||
protected $enabled = true;
|
||||
|
||||
private $completedPages;
|
||||
private $totalPages;
|
||||
|
||||
function run($request) {
|
||||
$this->runLinksCheck($this->limit);
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs the links checker and returns the track used
|
||||
*
|
||||
* @param int $limit Limit to number of pages to run
|
||||
* @return BrokenExternalPageTrackStatus
|
||||
*/
|
||||
public function runLinksCheck($limit) {
|
||||
$track = CheckExternalLinks::getLatestTrack();
|
||||
|
||||
// if the script has already been started
|
||||
if ($track && $track->Status == 'Running') {
|
||||
$batch = BrokenExternalPageTrack::get()
|
||||
->filter(array(
|
||||
'TrackID' => $track->ID,
|
||||
'Processed' => 0
|
||||
))->limit($limit)->column('PageID');
|
||||
$pages = Versioned::get_by_stage('SiteTree', 'Stage')
|
||||
->filter('ID', $batch)
|
||||
->limit($limit);
|
||||
$this->updateJobInfo('Fetching pages to check');
|
||||
if ($track->CompletedPages == $track->TotalPages) {
|
||||
$track->Status = 'Completed';
|
||||
$track->write();
|
||||
$this->updateJobInfo('Setting to completed');
|
||||
}
|
||||
// if the script is to be started
|
||||
} else {
|
||||
$pages = Versioned::get_by_stage('SiteTree', 'Stage')->column('ID');
|
||||
$noPages = count($pages);
|
||||
|
||||
$track = BrokenExternalPageTrackStatus::create();
|
||||
$track->TotalPages = $noPages;
|
||||
$track->write();
|
||||
$this->updateJobInfo('Creating new tracking object');
|
||||
|
||||
foreach ($pages as $page) {
|
||||
$trackPage = BrokenExternalPageTrack::create();
|
||||
$trackPage->PageID = $page;
|
||||
$trackPage->TrackID = $track->ID;
|
||||
$trackPage->write();
|
||||
}
|
||||
|
||||
$batch = BrokenExternalPageTrack::get()
|
||||
->filter(array(
|
||||
'TrackID' => $track->ID
|
||||
))->limit($limit)->column('PageID');
|
||||
|
||||
$pages = Versioned::get_by_stage('SiteTree', 'Stage')
|
||||
->filter('ID', $batch);
|
||||
}
|
||||
$trackID = $track->ID;
|
||||
foreach ($pages as $page) {
|
||||
++$this->totalPages;
|
||||
|
||||
if ($track->ID) {
|
||||
$trackPage = BrokenExternalPageTrack::get()
|
||||
->filter(array(
|
||||
'PageID' => $page->ID,
|
||||
'TrackID' => $track->ID
|
||||
))->first();
|
||||
$trackPage->Processed = 1;
|
||||
$trackPage->write();
|
||||
}
|
||||
|
||||
$htmlValue = Injector::inst()->create('HTMLValue', $page->Content);
|
||||
if (!$htmlValue->isValid()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Populate link tracking for internal links & links to asset files.
|
||||
if($links = $htmlValue->getElementsByTagName('a')) foreach($links as $link) {
|
||||
$class = $link->getAttribute('class');
|
||||
$pos = stripos($class, 'ss-broken');
|
||||
if ($pos !== false && $page->HasBrokenLink == 1) continue;
|
||||
|
||||
$href = Director::makeRelative($link->getAttribute('href'));
|
||||
if ($href == 'admin/') continue;
|
||||
|
||||
// ignore SiteTree, anchor and assets links as they will be caught
|
||||
// by SiteTreeLinkTracking
|
||||
if(preg_match('/\[(file_link|sitetree_link),id=([0-9]+)\]/i', $href, $matches)) {
|
||||
continue;
|
||||
} else if (isset($href[0]) && $href[0] == '#') {
|
||||
continue;
|
||||
} else if(substr($href, 0, strlen(ASSETS_DIR) + 1) == ASSETS_DIR.'/') {
|
||||
continue;
|
||||
}
|
||||
|
||||
if($href && function_exists('curl_init')) {
|
||||
$handle = curl_init($href);
|
||||
curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE);
|
||||
curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5);
|
||||
curl_setopt($handle, CURLOPT_TIMEOUT, 10);
|
||||
$response = curl_exec($handle);
|
||||
$httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE);
|
||||
curl_close($handle);
|
||||
// do we have any whitelisted codes
|
||||
$ignoreCodes = Config::inst()->get('CheckExternalLinks', 'IgnoreCodes');
|
||||
// if the code is whitelisted set it to 200
|
||||
$httpCode = (is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) ?
|
||||
200 : $httpCode;
|
||||
|
||||
// ignore empty hrefs and internal links
|
||||
if (($httpCode < 200 || $httpCode > 302) || ($href == '' || $href[0] == '/')) {
|
||||
$brokenLink = new BrokenExternalLink();
|
||||
$brokenLink->PageID = $page->ID;
|
||||
$brokenLink->Link = $href;
|
||||
$brokenLink->HTTPCode = $httpCode;
|
||||
$brokenLink->TrackID = $track->ID;
|
||||
$brokenLink->write();
|
||||
|
||||
// set the broken link class
|
||||
$class = ($class && stripos($class, 'ss-broken')) ?
|
||||
$class . ' ss-broken' : 'ss-broken';
|
||||
$link->setAttribute('class', ($class ? $class : 'ss-broken'));
|
||||
$htmlValue->__call('saveHTML', array());
|
||||
|
||||
$page->Content = $htmlValue->getContent();
|
||||
$page->owner->write();
|
||||
|
||||
if (!$page->HasBrokenLink) {
|
||||
|
||||
// bypass the ORM as syncLinkTracking does not allow you
|
||||
// to update HasBrokenLink to true
|
||||
$query = "UPDATE \"SiteTree\" SET \"HasBrokenLink\" = 1 ";
|
||||
$query .= "WHERE \"ID\" = " . (int)$page->ID;
|
||||
$result = DB::query($query);
|
||||
if (!$result) {
|
||||
$this->debugMessage('Error updating HasBrokenLink');
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
++$this->completedPages;
|
||||
}
|
||||
|
||||
// run this outside the foreach loop to stop it locking DB rows
|
||||
$this->updateJobInfo('Updating completed pages');
|
||||
$this->updateCompletedPages($trackID);
|
||||
|
||||
// do we need to carry on running the job
|
||||
$track = $this->getLatestTrack();
|
||||
if ($track->CompletedPages >= $track->TotalPages) {
|
||||
$track->Status = 'Completed';
|
||||
$track->write();
|
||||
|
||||
// clear any old previous data
|
||||
$rows = BrokenExternalPageTrack::get()
|
||||
->exclude('TrackID', $track->ID);
|
||||
foreach ($rows as $row) {
|
||||
$row->delete();
|
||||
}
|
||||
return $track;
|
||||
}
|
||||
|
||||
// if running via the queued job module return to the queued job after each iteration
|
||||
if ($limit == 1) {
|
||||
return $track;
|
||||
} else {
|
||||
$this->updateJobInfo("Running next batch {$track->CompletedPages}/{$track->TotalPages}");
|
||||
return $this->runLinksCheck($limit);
|
||||
}
|
||||
}
|
||||
|
||||
public static function getLatestTrack() {
|
||||
$track = BrokenExternalPageTrackStatus::get()->sort('ID', 'DESC')->first();
|
||||
if (!$track || !$track->exists()) return null;
|
||||
return $track;
|
||||
}
|
||||
|
||||
public static function getLatestTrackID() {
|
||||
$track = CheckExternalLinks::getLatestTrack();
|
||||
if (!$track || !$track->exists()) return null;
|
||||
return $track->ID;
|
||||
}
|
||||
|
||||
public static function getLatestTrackStatus() {
|
||||
$track = CheckExternalLinks::getLatestTrack();
|
||||
if (!$track || !$track->exists()) return null;
|
||||
return $track->Status;
|
||||
}
|
||||
|
||||
private function updateCompletedPages($trackID = 0) {
|
||||
$noPages = BrokenExternalPageTrack::get()
|
||||
->filter(array('TrackID' => $trackID, 'Processed' => 1))->count();
|
||||
$track = $this->getLatestTrack($trackID);
|
||||
$track->CompletedPages = $noPages;
|
||||
$track->write();
|
||||
return $noPages;
|
||||
}
|
||||
|
||||
private function updateJobInfo($message) {
|
||||
$track = CheckExternalLinks::getLatestTrack();
|
||||
if (!$track || !$track->exists()) return null;
|
||||
$track->JobInfo = $message;
|
||||
$track->write();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,204 @@
|
|||
<?php
|
||||
|
||||
class CheckExternalLinksTask extends BuildTask {
|
||||
|
||||
/**
|
||||
* Log a message
|
||||
*
|
||||
* @param string $message
|
||||
*/
|
||||
protected function log($message) {
|
||||
if(!$this->silent) Debug::message($message);
|
||||
}
|
||||
|
||||
/**
|
||||
* @var bool
|
||||
*/
|
||||
protected $silent = false;
|
||||
|
||||
/**
|
||||
* Turn on or off message output
|
||||
*
|
||||
* @param bool $silent
|
||||
*/
|
||||
public function setSilent($silent) {
|
||||
$this->silent = $silent;
|
||||
}
|
||||
|
||||
protected $title = 'Checking broken External links in the SiteTree';
|
||||
|
||||
protected $description = 'A task that records external broken links in the SiteTree';
|
||||
|
||||
protected $enabled = true;
|
||||
|
||||
private static $dependencies = array(
|
||||
'LinkChecker' => '%$LinkChecker'
|
||||
);
|
||||
|
||||
public function run($request) {
|
||||
$this->runLinksCheck();
|
||||
}
|
||||
|
||||
/**
|
||||
* @var LinkChecker
|
||||
*/
|
||||
protected $linkChecker;
|
||||
|
||||
/**
|
||||
* @param LinkChecker $linkChecker
|
||||
*/
|
||||
public function setLinkChecker(LinkChecker $linkChecker) {
|
||||
$this->linkChecker = $linkChecker;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return LinkChecker
|
||||
*/
|
||||
public function getLinkChecker() {
|
||||
return $this->linkChecker;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Check the status of a single link on a page
|
||||
*
|
||||
* @param BrokenExternalPageTrack $pageTrack
|
||||
* @param DOMNode $link
|
||||
*/
|
||||
protected function checkPageLink(BrokenExternalPageTrack $pageTrack, DOMNode $link) {
|
||||
$class = $link->getAttribute('class');
|
||||
$href = $link->getAttribute('href');
|
||||
$markedBroken = preg_match('/\b(ss-broken)\b/', $class);
|
||||
|
||||
// Check link
|
||||
$httpCode = $this->linkChecker->checkLink($href);
|
||||
if($httpCode === null) return; // Null link means uncheckable, such as an internal link
|
||||
|
||||
// If this code is broken then mark as such
|
||||
if($foundBroken = $this->isCodeBroken($httpCode)) {
|
||||
// Create broken record
|
||||
$brokenLink = new BrokenExternalLink();
|
||||
$brokenLink->Link = $href;
|
||||
$brokenLink->HTTPCode = $httpCode;
|
||||
$brokenLink->TrackID = $pageTrack->ID;
|
||||
$brokenLink->StatusID = $pageTrack->StatusID; // Slight denormalisation here for performance reasons
|
||||
$brokenLink->write();
|
||||
}
|
||||
|
||||
// Check if we need to update CSS class, otherwise return
|
||||
if($markedBroken == $foundBroken) return;
|
||||
if($foundBroken) {
|
||||
$class .= ' ss-broken';
|
||||
} else {
|
||||
$class = preg_replace('/\s*\b(ss-broken)\b\s*/', ' ', $class);
|
||||
}
|
||||
$link->setAttribute('class', trim($class));
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine if the given HTTP code is "broken"
|
||||
*
|
||||
* @param int $httpCode
|
||||
* @return bool True if this is a broken code
|
||||
*/
|
||||
protected function isCodeBroken($httpCode) {
|
||||
// Null represents no request attempted
|
||||
if($httpCode === null) return false;
|
||||
|
||||
// do we have any whitelisted codes
|
||||
$ignoreCodes = Config::inst()->get('CheckExternalLinks', 'IgnoreCodes');
|
||||
if(is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) return false;
|
||||
|
||||
// Check if code is outside valid range
|
||||
return $httpCode < 200 || $httpCode > 302;
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs the links checker and returns the track used
|
||||
*
|
||||
* @param int $limit Limit to number of pages to run, or null to run all
|
||||
* @return BrokenExternalPageTrackStatus
|
||||
*/
|
||||
public function runLinksCheck($limit = null) {
|
||||
// Check the current status
|
||||
$status = BrokenExternalPageTrackStatus::get_or_create();
|
||||
|
||||
// Calculate pages to run
|
||||
$pageTracks = $status->getIncompleteTracks();
|
||||
if($limit) $pageTracks = $pageTracks->limit($limit);
|
||||
|
||||
// Check each page
|
||||
foreach ($pageTracks as $pageTrack) {
|
||||
// Flag as complete
|
||||
$pageTrack->Processed = 1;
|
||||
$pageTrack->write();
|
||||
|
||||
// Check value of html area
|
||||
$page = $pageTrack->Page();
|
||||
$this->log("Checking {$page->Title}");
|
||||
$htmlValue = Injector::inst()->create('HTMLValue', $page->Content);
|
||||
if (!$htmlValue->isValid()) continue;
|
||||
|
||||
// Check each link
|
||||
$links = $htmlValue->getElementsByTagName('a');
|
||||
foreach($links as $link) {
|
||||
$this->checkPageLink($pageTrack, $link);
|
||||
}
|
||||
|
||||
// Update content of page based on link fixes / breakages
|
||||
$htmlValue->saveHTML();
|
||||
$page->Content = $htmlValue->getContent();
|
||||
$page->write();
|
||||
|
||||
// Once all links have been created for this page update HasBrokenLinks
|
||||
$count = $pageTrack->BrokenLinks()->count();
|
||||
$this->log("Found {$count} broken links");
|
||||
if($count) {
|
||||
// Bypass the ORM as syncLinkTracking does not allow you to update HasBrokenLink to true
|
||||
DB::query(sprintf(
|
||||
'UPDATE "SiteTree" SET "HasBrokenLink" = 1 WHERE "ID" = \'%d\'',
|
||||
intval($pageTrack->ID)
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
$status->updateJobInfo('Updating completed pages');
|
||||
$status->updateStatus();
|
||||
return $status;
|
||||
}
|
||||
|
||||
public static function getLatestTrack() {
|
||||
return BrokenExternalPageTrackStatus::get_latest();
|
||||
}
|
||||
|
||||
public static function getLatestTrackID() {
|
||||
$track = BrokenExternalPageTrackStatus::get_latest();
|
||||
return $track ? $track->ID : null;
|
||||
}
|
||||
|
||||
public static function getLatestTrackStatus() {
|
||||
$track = BrokenExternalPageTrackStatus::get_latest();
|
||||
return $track ? $track->Status : null;
|
||||
}
|
||||
|
||||
private function updateCompletedPages($trackID = 0) {
|
||||
$noPages = BrokenExternalPageTrack::get()
|
||||
->filter(array(
|
||||
'TrackID' => $trackID,
|
||||
'Processed' => 1
|
||||
))
|
||||
->count();
|
||||
$track = $this->getLatestTrack($trackID);
|
||||
$track->CompletedPages = $noPages;
|
||||
$track->write();
|
||||
return $noPages;
|
||||
}
|
||||
|
||||
private function updateJobInfo($message) {
|
||||
$track = BrokenExternalPageTrackStatus::get_latest();
|
||||
if($track) {
|
||||
$track->JobInfo = $message;
|
||||
$track->write();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Check links using curl
|
||||
*/
|
||||
class CurlLinkChecker implements LinkChecker {
|
||||
|
||||
/**
|
||||
* Return cache
|
||||
*
|
||||
* @return Zend_Cache_Frontend
|
||||
*/
|
||||
protected function getCache() {
|
||||
return SS_Cache::factory(
|
||||
__CLASS__,
|
||||
'Output',
|
||||
array('automatic_serialization' => true)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine the http status code for a given link
|
||||
*
|
||||
* @param string $href URL to check
|
||||
* @return int HTTP status code, or null if not checkable (not a link)
|
||||
*/
|
||||
protected function checkLink($href) {
|
||||
// Skip non-external links
|
||||
if(!preg_match('/^https?[^:]*:\/\//', $href)) return null;
|
||||
|
||||
// Check if we have a cached result
|
||||
$cacheKey = md5($href);
|
||||
$result = $this->getCache()->load($cacheKey);
|
||||
if($result !== false) return $result;
|
||||
|
||||
// No cached result so just request
|
||||
$handle = curl_init($href);
|
||||
curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE);
|
||||
curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5);
|
||||
curl_setopt($handle, CURLOPT_TIMEOUT, 10);
|
||||
curl_exec($handle);
|
||||
$httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE);
|
||||
curl_close($handle);
|
||||
|
||||
// Cache result
|
||||
$this->getCache()->save($httpCode, $cacheKey);
|
||||
return $httpCode;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Provides an interface for checking that a link is valid
|
||||
*/
|
||||
interface LinkChecker {
|
||||
|
||||
/**
|
||||
* Determine the http status code for a given link
|
||||
*
|
||||
* @param string $href URL to check
|
||||
* @return int HTTP status code, or null if not checkable (not a link)
|
||||
*/
|
||||
public function checkLink($href);
|
||||
}
|
|
@ -9,11 +9,14 @@
|
|||
"email": "kirk@silverstripe.com"
|
||||
}
|
||||
],
|
||||
"require":
|
||||
{
|
||||
"require": {
|
||||
"silverstripe/framework": ">=3.0",
|
||||
"silverstripe/cms": ">=3.0"
|
||||
},
|
||||
"require-dev": {
|
||||
"hafriedlander/silverstripe-phockito": "*",
|
||||
"phpunit/PHPUnit": "~3.7@stable"
|
||||
},
|
||||
"suggest": {
|
||||
"silverstripe/queuedjobs": "Speeds up running the job for Content Editors fropm the report"
|
||||
}
|
||||
|
|
|
@ -1,57 +1,63 @@
|
|||
(function($) {
|
||||
$('#externalLinksReport').entwine({
|
||||
onclick: function() {
|
||||
$(this).start();
|
||||
},
|
||||
onmatch: function() {
|
||||
// poll the current job and update the front end status
|
||||
$('#externalLinksReport').hide();
|
||||
$(this).poll(0);
|
||||
},
|
||||
start: function() {
|
||||
// initiate a new job
|
||||
$('#ReportHolder').empty();
|
||||
$('#ReportHolder').text('Running report 0%');
|
||||
$('#ReportHolder').append('<span class="ss-ui-loading-icon"></span>');
|
||||
$('#externalLinksReport').hide();
|
||||
$.ajax({url: "admin/externallinks/start", async: true, timeout: 3000 });
|
||||
$(this).poll(1);
|
||||
},
|
||||
poll: function(start) {
|
||||
$.ajax({
|
||||
url: "admin/externallinks/getJobStatus",
|
||||
async: true,
|
||||
success: function(data) {
|
||||
var obj = $.parseJSON(data);
|
||||
|
||||
// No report, so let user create one
|
||||
if (!obj) {
|
||||
$('#externalLinksReport').show();
|
||||
return;
|
||||
}
|
||||
var completed = obj.Completed ? obj.Completed : 0;
|
||||
var total = obj.Total ? obj.Total : 0;
|
||||
var jobStatus = obj.Status ? obj.Status : 'Running';
|
||||
if (jobStatus == 'Completed' && start == 0) {
|
||||
$('#ReportHolder').text('Report Finished ' + completed + '/' + total);
|
||||
$('#externalLinksReport').show();
|
||||
} else {
|
||||
setTimeout(function() { $('#externalLinksReport').poll(0); }, 1000);
|
||||
}
|
||||
if (total && completed) {
|
||||
$.entwine('ss', function($) {
|
||||
$('#externalLinksReport').entwine({
|
||||
onclick: function() {
|
||||
$(this).start();
|
||||
},
|
||||
onmatch: function() {
|
||||
// poll the current job and update the front end status
|
||||
$('#externalLinksReport').hide();
|
||||
$(this).poll(0);
|
||||
},
|
||||
start: function() {
|
||||
// initiate a new job
|
||||
$('#ReportHolder').empty();
|
||||
$('#ReportHolder').text('Running report 0%');
|
||||
$('#ReportHolder').append('<span class="ss-ui-loading-icon"></span>');
|
||||
$('#externalLinksReport').hide();
|
||||
$.ajax({url: "admin/externallinks/start", async: true, timeout: 3000 });
|
||||
$(this).poll(1);
|
||||
},
|
||||
poll: function(start) {
|
||||
$.ajax({
|
||||
url: "admin/externallinks/getJobStatus",
|
||||
async: true,
|
||||
success: function(data) {
|
||||
// No report, so let user create one
|
||||
if (!data) {
|
||||
$('#externalLinksReport').show();
|
||||
return;
|
||||
}
|
||||
|
||||
// Parse data
|
||||
var completed = data.Completed ? data.Completed : 0;
|
||||
var total = data.Total ? data.Total : 0;
|
||||
|
||||
// If complete status
|
||||
if (data.Status === 'Completed') {
|
||||
$('#ReportHolder').text('Report Finished ' + completed + '/' + total);
|
||||
$('#externalLinksReport').show();
|
||||
return;
|
||||
}
|
||||
|
||||
// If incomplete update status
|
||||
if (completed < total) {
|
||||
var percent = (completed / total) * 100;
|
||||
$('#ReportHolder').text('Running report ' + completed + '/' +
|
||||
total + ' (' + percent.toFixed(2) + '%)');
|
||||
$('#ReportHolder').
|
||||
append('<span class="ss-ui-loading-icon"></span>');
|
||||
}
|
||||
$('#ReportHolder')
|
||||
.text('Running report ' + completed + '/' + total + ' (' + percent.toFixed(2) + '%)')
|
||||
.append('<span class="ss-ui-loading-icon"></span>');
|
||||
}
|
||||
|
||||
// Ensure the regular poll method is run
|
||||
if(!start) {
|
||||
setTimeout(function() { $('#externalLinksReport').poll(0); }, 1000);
|
||||
}
|
||||
},
|
||||
error: function(e) {
|
||||
if(typeof console !== 'undefined') console.log(e);
|
||||
}
|
||||
},
|
||||
error: function(e) {
|
||||
if(typeof console !== 'undefined') console.log(e);
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
}(jQuery));
|
||||
|
|
|
@ -1,32 +1,129 @@
|
|||
<?php
|
||||
|
||||
class ExternalLinks extends FunctionalTest {
|
||||
if (class_exists('Phockito')) Phockito::include_hamcrest();
|
||||
|
||||
class ExternalLinksTest extends SapphireTest {
|
||||
|
||||
protected static $fixture_file = 'ExternalLinksTest.yml';
|
||||
|
||||
protected $extraDataObjects = array(
|
||||
'ExternalLinksTest_Page'
|
||||
);
|
||||
|
||||
public function setUp() {
|
||||
parent::setUp();
|
||||
|
||||
Injector::nest();
|
||||
|
||||
// Check dependencies
|
||||
if (!class_exists('Phockito')) {
|
||||
$this->skipTest = true;
|
||||
return $this->markTestSkipped("These tests need the Phockito module installed to run");
|
||||
}
|
||||
|
||||
// Mock link checker
|
||||
$checker = Phockito::mock('LinkChecker');
|
||||
Phockito::when($checker)
|
||||
->checkLink('http://www.working.com')
|
||||
->return(200);
|
||||
|
||||
Phockito::when($checker)
|
||||
->checkLink('http://www.broken.com/url/thing') // 404 on working site
|
||||
->return(404);
|
||||
|
||||
Phockito::when($checker)
|
||||
->checkLink('http://www.broken.com') // 403 on working site
|
||||
->return(403);
|
||||
|
||||
Phockito::when($checker)
|
||||
->checkLink('http://www.nodomain.com') // no ping
|
||||
->return(0);
|
||||
|
||||
Phockito::when($checker)
|
||||
->checkLink('/internal/link')
|
||||
->return(null);
|
||||
|
||||
Phockito::when($checker)
|
||||
->checkLink('[sitetree_link,id=9999]')
|
||||
->return(null);
|
||||
|
||||
Phockito::when($checker)
|
||||
->checkLink('home')
|
||||
->return(null);
|
||||
|
||||
Phockito::when($checker)
|
||||
->checkLink('broken-internal')
|
||||
->return(null);
|
||||
|
||||
Phockito::when($checker)
|
||||
->checkLink('[sitetree_link,id=1]')
|
||||
->return(null);
|
||||
|
||||
Phockito::when($checker)
|
||||
->checkLink(anything()) // anything else is 404
|
||||
->return(404);
|
||||
|
||||
Injector::inst()->registerService($checker, 'LinkChecker');
|
||||
}
|
||||
|
||||
public function tearDown() {
|
||||
Injector::unnest();
|
||||
parent::tearDown();
|
||||
}
|
||||
|
||||
public function testLinks() {
|
||||
// uses http://127.0.0.1 to test a working link
|
||||
$working = $this->objFromFixture('SiteTree', 'working');
|
||||
$working->write();
|
||||
$task = new CheckExternalLinks();
|
||||
$task->run(null);
|
||||
$brokenLinks = BrokenExternalLink::get()->column('Link');;
|
||||
// confirm the working link has not been added as a broken link
|
||||
$this->assertNotEquals($working->Link, $brokenLinks[0]);
|
||||
}
|
||||
|
||||
public function testBrokenLink() {
|
||||
// uses http://192.0.2.1 for a broken link
|
||||
$broken = $this->objFromFixture('SiteTree', 'broken');
|
||||
$broken->write();
|
||||
$task = new CheckExternalLinks();
|
||||
$task->run(null);
|
||||
$brokenLinks = BrokenExternalLink::get();
|
||||
$this->assertEquals(1, $brokenLinks->count());
|
||||
// Run link checker
|
||||
$task = CheckExternalLinksTask::create();
|
||||
$task->setSilent(true); // Be quiet during the test!
|
||||
$task->runLinksCheck();
|
||||
|
||||
// Get all links checked
|
||||
$status = BrokenExternalPageTrackStatus::get_latest();
|
||||
$this->assertEquals('Completed', $status->Status);
|
||||
$this->assertEquals(5, $status->TotalPages);
|
||||
$this->assertEquals(5, $status->CompletedPages);
|
||||
|
||||
// Check all pages have had the correct HTML adjusted
|
||||
for($i = 1; $i <= 5; $i++) {
|
||||
$page = $this->objFromFixture('ExternalLinksTest_Page', 'page'.$i);
|
||||
$this->assertNotEmpty($page->Content);
|
||||
$this->assertEquals(
|
||||
$page->ExpectedContent,
|
||||
$page->Content,
|
||||
"Assert that the content of page{$i} has been updated"
|
||||
);
|
||||
}
|
||||
|
||||
// Check that the correct report of broken links is generated
|
||||
$links = $status
|
||||
->BrokenLinks()
|
||||
->sort('Link');
|
||||
|
||||
$this->assertEquals(4, $links->count());
|
||||
$this->assertEquals(
|
||||
array(
|
||||
'http://www.broken.com',
|
||||
'http://www.broken.com/url/thing',
|
||||
'http://www.broken.com/url/thing',
|
||||
'http://www.nodomain.com'
|
||||
),
|
||||
array_values($links->map('ID', 'Link')->toArray())
|
||||
);
|
||||
|
||||
// Check response codes are correct
|
||||
$expected = array(
|
||||
'http://www.broken.com' => 403,
|
||||
'http://www.broken.com/url/thing' => 404,
|
||||
'http://www.nodomain.com' => 0
|
||||
);
|
||||
$actual = $links->map('Link', 'HTTPCode')->toArray();
|
||||
$this->assertEquals($expected, $actual);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that broken links appears in the reports list
|
||||
*/
|
||||
public function testReportExists() {
|
||||
$mock = $this->objFromFixture('SiteTree', 'broken');
|
||||
$reports = SS_Report::get_reports();
|
||||
$reportNames = array();
|
||||
foreach($reports as $report) {
|
||||
|
@ -37,3 +134,8 @@ class ExternalLinks extends FunctionalTest {
|
|||
}
|
||||
}
|
||||
|
||||
class ExternalLinksTest_Page extends Page implements TestOnly {
|
||||
private static $db = array(
|
||||
'ExpectedContent' => 'HTMLText'
|
||||
);
|
||||
}
|
||||
|
|
|
@ -1,7 +1,56 @@
|
|||
SiteTree:
|
||||
working:
|
||||
Title: Working Link
|
||||
Content: '<a href="http://127.0.0.1">Localhost</a>'
|
||||
broken:
|
||||
Title: Broken Link
|
||||
Content: '<a href="http://192.0.2.1">Broken</a>'
|
||||
ExternalLinksTest_Page:
|
||||
# Tests mix of broken and working external links
|
||||
page1:
|
||||
Title: 'Page 1'
|
||||
Content: >
|
||||
<h1>Links</h1>
|
||||
<a href="http://www.working.com">This is a working site</a>
|
||||
<h2>Other Links</h2>
|
||||
<a href="http://www.broken.com">but this isn't</a>
|
||||
ExpectedContent: >
|
||||
<h1>Links</h1>
|
||||
<a href="http://www.working.com">This is a working site</a>
|
||||
<h2>Other Links</h2>
|
||||
<a href="http://www.broken.com" class="ss-broken">but this isn't</a>
|
||||
# Tests broken external link staying broken
|
||||
page2:
|
||||
Title: 'Page 2'
|
||||
Content: >
|
||||
<p><a href="http://www.broken.com/url/thing" class="ss-broken">Still Broken</a></p>
|
||||
ExpectedContent: >
|
||||
<p><a href="http://www.broken.com/url/thing" class="ss-broken">Still Broken</a></p>
|
||||
# Tests internal broken links not marking a page as broken
|
||||
page3:
|
||||
Title: 'Page 3'
|
||||
Content: >
|
||||
<h1>Links</h1>
|
||||
<a href="home">Home page</a>
|
||||
<a href="broken-internal" class="ss-broken">Broken internal page</a>
|
||||
<a href="http://www.working.com">This is a working site</a>
|
||||
ExpectedContent: >
|
||||
<h1>Links</h1>
|
||||
<a href="home">Home page</a>
|
||||
<a href="broken-internal" class="ss-broken">Broken internal page</a>
|
||||
<a href="http://www.working.com">This is a working site</a>
|
||||
# Tests httpcode = 0
|
||||
page4:
|
||||
Title: 'Page 4'
|
||||
Content: >
|
||||
<a href="http://www.nodomain.com">This shouldn't even have a HTTP response</a>
|
||||
<a href="[sitetree_link,id=9999]" class="ss-broken">Another Link</a>
|
||||
<p><a href="http://www.broken.com/url/thing">Copied from another page</a></p>
|
||||
ExpectedContent: >
|
||||
<a href="http://www.nodomain.com" class="ss-broken">This shouldn't even have a HTTP response</a>
|
||||
<a href="[sitetree_link,id=9999]" class="ss-broken">Another Link</a>
|
||||
<p><a href="http://www.broken.com/url/thing" class="ss-broken">Copied from another page</a></p>
|
||||
# Test page with no broken links
|
||||
page5:
|
||||
Title: 'Page 5'
|
||||
Content: >
|
||||
<a href="/internal/link">Internal Link</a>
|
||||
<a href="[sitetree_link,id=1]">Another Link</a>
|
||||
<a href="http://www.working.com">This is a working site</a>
|
||||
ExpectedContent: >
|
||||
<a href="/internal/link">Internal Link</a>
|
||||
<a href="[sitetree_link,id=1]">Another Link</a>
|
||||
<a href="http://www.working.com">This is a working site</a>
|
||||
|
|
Loading…
Reference in New Issue