NEW: Adding link tracking css class and using built in reporting

This commit is contained in:
Kirk Mayo 2014-07-23 09:57:10 +12:00
parent 9708dfd074
commit 3b976fd408
4 changed files with 50 additions and 77 deletions

View File

@ -38,12 +38,6 @@ Add the following code to the mysite config to run the job every 24 hours (86400
`Config::inst()->update('CheckExternalLinks', 'QueuedJob', 86400);` `Config::inst()->update('CheckExternalLinks', 'QueuedJob', 86400);`
## Disable the Broken external link menu
To disable the *Broken Ext. Links* menu add the following code to mysite/_config.php
`CMSMenu::remove_menu_item('BrokenExternalLinksAdmin');`
## TODO ## ## TODO ##
Fix setting the class attribute of broken links to ss-broken Fix setting the class attribute of broken links to ss-broken

View File

@ -9,7 +9,7 @@ class CheckExternalLinksJob extends AbstractQueuedJob {
public static $regenerate_time = 43200; public static $regenerate_time = 43200;
public function __construct() { public function __construct() {
$this->pagesToProcess = DB::query('SELECT "ID" FROM "SiteTree_Live" WHERE "ShowInSearch"=1')->column(); $this->pagesToProcess = SiteTree::get();
$this->currentStep = 0; $this->currentStep = 0;
$this->totalSteps = count($this->pagesToProcess); $this->totalSteps = count($this->pagesToProcess);
} }
@ -49,7 +49,7 @@ class CheckExternalLinksJob extends AbstractQueuedJob {
$restart = $this->currentStep == 0; $restart = $this->currentStep == 0;
if ($restart) { if ($restart) {
$this->pagesToProcess = DB::query('SELECT "ID" FROM SiteTree_Live WHERE ShowInSearch=1')->column(); $this->pagesToProcess = SiteTree::get();
} }
} }
@ -63,6 +63,10 @@ class CheckExternalLinksJob extends AbstractQueuedJob {
public function process() { public function process() {
$task = new CheckExternalLinks(); $task = new CheckExternalLinks();
$task->run(); $task->run();
$data = $this->getJobData();
$completedPages = $task->getCompletedPages();
$totalPages = $task->getTotalPages();
$this->addMessage("$completedPages/$totalPages pages completed");
$this->completeJob(); $this->completeJob();
} }

View File

@ -1,44 +0,0 @@
<?php
class BrokenExternalLinks extends DataObject {
private static $db = array(
'Link' => 'Varchar(2083)', // 2083 is the maximum length of a URL in Internet Explorer.
'HTTPCode' =>'Int'
);
private static $has_one = array(
'Page' => 'Page'
);
public static $summary_fields = array(
'Page.Title' => 'Page',
'HTTPCode' => 'HTTP Code',
'Created' => 'Created'
);
public static $searchable_fields = array(
'HTTPCode' => array('title' => 'HTTP Code')
);
function canEdit($member = false) {
return false;
}
}
class BrokenExternalLinksAdmin extends ModelAdmin {
public static $url_segment = 'broken-external-links-admin';
public static $managed_models = array(
'BrokenExternalLinks'
);
public static $menu_title = 'Broken Ext. links';
public function init() {
parent::init();
}
}

View File

@ -7,26 +7,46 @@ class CheckExternalLinks extends BuildTask {
protected $enabled = true; protected $enabled = true;
private $completedPages;
private $totalPages;
public function getCompletedPages() {
return $this->completedPages;
}
public function getTotalPages() {
return $this->totalPages;
}
function run($request) { function run($request) {
// clear broken external link table $pages = Versioned::get_by_stage('SiteTree', 'Live');
$table = 'BrokenExternalLinks';
if(method_exists(DB::getConn(), 'clearTable')) DB::getConn()->clearTable($table);
else DB::query("TRUNCATE \"$table\"");
$pages = SiteTree::get();
foreach ($pages as $page) { foreach ($pages as $page) {
++$this->totalPages;
$htmlValue = Injector::inst()->create('HTMLValue', $page->Content); $htmlValue = Injector::inst()->create('HTMLValue', $page->Content);
if (!$htmlValue->isValid()) {
continue;
}
// Populate link tracking for internal links & links to asset files. // Populate link tracking for internal links & links to asset files.
if($links = $htmlValue->getElementsByTagName('a')) foreach($links as $link) { if($links = $htmlValue->getElementsByTagName('a')) foreach($links as $link) {
$class = $link->getAttribute('class');
$pos = stripos($class, 'ss-broken');
if ($pos !== false && $page->HasBrokenLink == 1) continue;
$href = Director::makeRelative($link->getAttribute('href')); $href = Director::makeRelative($link->getAttribute('href'));
if ($href == 'admin/') continue; if ($href == 'admin/') continue;
// ignore SiteTree and assets links as they will be caught by SiteTreeLinkTracking // ignore SiteTree, anchor and assets links as they will be caught
if(preg_match('/\[sitetree_link,id=([0-9]+)\]/i', $href, $matches)) { // by SiteTreeLinkTracking
if(preg_match('/\[(file_link|sitetree_link),id=([0-9]+)\]/i', $href, $matches)) {
continue;
} else if (isset($href[0]) && $href[0] == '#') {
continue; continue;
} else if(substr($href, 0, strlen(ASSETS_DIR) + 1) == ASSETS_DIR.'/') { } else if(substr($href, 0, strlen(ASSETS_DIR) + 1) == ASSETS_DIR.'/') {
continue; continue;
} }
if($href && function_exists('curl_init')) { if($href && function_exists('curl_init')) {
$handle = curl_init($href); $handle = curl_init($href);
curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE); curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE);
@ -36,31 +56,30 @@ class CheckExternalLinks extends BuildTask {
if (($httpCode < 200 || $httpCode > 302) if (($httpCode < 200 || $httpCode > 302)
|| ($href == '' || $href[0] == '/')) || ($href == '' || $href[0] == '/'))
{ {
$brokenLink = new BrokenExternalLinks(); // set the broken link class
$brokenLink->PageID = $page->ID; $class = ($class && stripos($class, 'ss-broken')) ?
$brokenLink->Link = $href; $class . ' ss-broken' : 'ss-broken';
$brokenLink->HTTPCode = $httpCode; $link->setAttribute('class', ($class ? $class : 'ss-broken'));
$brokenLink->write(); $htmlValue->__call('saveHTML', array());
// TODO set the broken link class $page->Content = $htmlValue->getContent();
/* $page->write();
$class = $link->getAttribute('class');
$class = ($class) ? $class . 'ss-broken' : 'ss-broken';
$link->setAttribute('class', ($class ? "$class ss-broken" : 'ss-broken'));
*/
// use raw sql query to set broken link as calling the dataobject write if (!$page->HasBrokenLink) {
// method will reset the links if no broken internal links are found // bypass the ORM as syncLinkTracking does not allow you
$query = "UPDATE \"SiteTree\" SET \"HasBrokenLink\" = 1 "; // to update HasBrokenLink to true
$query = "UPDATE \"SiteTree_Live\" SET \"HasBrokenLink\" = 1 ";
$query .= "WHERE \"ID\" = " . (int)$page->ID; $query .= "WHERE \"ID\" = " . (int)$page->ID;
$result = DB::query($query); $result = DB::query($query);
if (!$result) { if (!$result) {
// error updating hasBrokenLink $this->debugMessage('Error updating HasBrokenLink');
}
} }
} }
} }
} }
++$this->completedPages;
} }
// run this again if queued jobs exists and is a valid int // run this again if queued jobs exists and is a valid int