From 3b976fd408bec9e81524c7582d484fc6dfcb1115 Mon Sep 17 00:00:00 2001 From: Kirk Mayo Date: Wed, 23 Jul 2014 09:57:10 +1200 Subject: [PATCH] NEW: Adding link tracking css class and using built in reporting --- README.md | 6 --- code/jobs/CheckExternalLinksJob.php | 8 +++- code/model/BrokenExternalLinks.php | 44 ------------------ code/tasks/CheckExternalLinks.php | 69 ++++++++++++++++++----------- 4 files changed, 50 insertions(+), 77 deletions(-) delete mode 100644 code/model/BrokenExternalLinks.php diff --git a/README.md b/README.md index f939d47..2196e70 100644 --- a/README.md +++ b/README.md @@ -38,12 +38,6 @@ Add the following code to the mysite config to run the job every 24 hours (86400 `Config::inst()->update('CheckExternalLinks', 'QueuedJob', 86400);` -## Disable the Broken external link menu - -To disable the *Broken Ext. Links* menu add the following code to mysite/_config.php - -`CMSMenu::remove_menu_item('BrokenExternalLinksAdmin');` - ## TODO ## Fix setting the class attribute of broken links to ss-broken diff --git a/code/jobs/CheckExternalLinksJob.php b/code/jobs/CheckExternalLinksJob.php index 20ee9c6..6c9c34b 100644 --- a/code/jobs/CheckExternalLinksJob.php +++ b/code/jobs/CheckExternalLinksJob.php @@ -9,7 +9,7 @@ class CheckExternalLinksJob extends AbstractQueuedJob { public static $regenerate_time = 43200; public function __construct() { - $this->pagesToProcess = DB::query('SELECT "ID" FROM "SiteTree_Live" WHERE "ShowInSearch"=1')->column(); + $this->pagesToProcess = SiteTree::get(); $this->currentStep = 0; $this->totalSteps = count($this->pagesToProcess); } @@ -49,7 +49,7 @@ class CheckExternalLinksJob extends AbstractQueuedJob { $restart = $this->currentStep == 0; if ($restart) { - $this->pagesToProcess = DB::query('SELECT "ID" FROM SiteTree_Live WHERE ShowInSearch=1')->column(); + $this->pagesToProcess = SiteTree::get(); } } @@ -63,6 +63,10 @@ class CheckExternalLinksJob extends AbstractQueuedJob { public function process() { $task = new CheckExternalLinks(); $task->run(); + $data = $this->getJobData(); + $completedPages = $task->getCompletedPages(); + $totalPages = $task->getTotalPages(); + $this->addMessage("$completedPages/$totalPages pages completed"); $this->completeJob(); } diff --git a/code/model/BrokenExternalLinks.php b/code/model/BrokenExternalLinks.php deleted file mode 100644 index 4e73c1d..0000000 --- a/code/model/BrokenExternalLinks.php +++ /dev/null @@ -1,44 +0,0 @@ - 'Varchar(2083)', // 2083 is the maximum length of a URL in Internet Explorer. - 'HTTPCode' =>'Int' - ); - - private static $has_one = array( - 'Page' => 'Page' - ); - - public static $summary_fields = array( - 'Page.Title' => 'Page', - 'HTTPCode' => 'HTTP Code', - 'Created' => 'Created' - ); - - public static $searchable_fields = array( - 'HTTPCode' => array('title' => 'HTTP Code') - ); - - function canEdit($member = false) { - return false; - } - -} - -class BrokenExternalLinksAdmin extends ModelAdmin { - - public static $url_segment = 'broken-external-links-admin'; - - public static $managed_models = array( - 'BrokenExternalLinks' - ); - - public static $menu_title = 'Broken Ext. links'; - - public function init() { - parent::init(); - } - -} diff --git a/code/tasks/CheckExternalLinks.php b/code/tasks/CheckExternalLinks.php index 1232acf..830bb54 100644 --- a/code/tasks/CheckExternalLinks.php +++ b/code/tasks/CheckExternalLinks.php @@ -7,26 +7,46 @@ class CheckExternalLinks extends BuildTask { protected $enabled = true; + private $completedPages; + private $totalPages; + + public function getCompletedPages() { + return $this->completedPages; + } + + public function getTotalPages() { + return $this->totalPages; + } + function run($request) { - // clear broken external link table - $table = 'BrokenExternalLinks'; - if(method_exists(DB::getConn(), 'clearTable')) DB::getConn()->clearTable($table); - else DB::query("TRUNCATE \"$table\""); - $pages = SiteTree::get(); + $pages = Versioned::get_by_stage('SiteTree', 'Live'); foreach ($pages as $page) { + ++$this->totalPages; + $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); + if (!$htmlValue->isValid()) { + continue; + } // Populate link tracking for internal links & links to asset files. if($links = $htmlValue->getElementsByTagName('a')) foreach($links as $link) { + $class = $link->getAttribute('class'); + $pos = stripos($class, 'ss-broken'); + if ($pos !== false && $page->HasBrokenLink == 1) continue; + $href = Director::makeRelative($link->getAttribute('href')); if ($href == 'admin/') continue; - // ignore SiteTree and assets links as they will be caught by SiteTreeLinkTracking - if(preg_match('/\[sitetree_link,id=([0-9]+)\]/i', $href, $matches)) { + // ignore SiteTree, anchor and assets links as they will be caught + // by SiteTreeLinkTracking + if(preg_match('/\[(file_link|sitetree_link),id=([0-9]+)\]/i', $href, $matches)) { + continue; + } else if (isset($href[0]) && $href[0] == '#') { continue; } else if(substr($href, 0, strlen(ASSETS_DIR) + 1) == ASSETS_DIR.'/') { continue; } + if($href && function_exists('curl_init')) { $handle = curl_init($href); curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE); @@ -36,31 +56,30 @@ class CheckExternalLinks extends BuildTask { if (($httpCode < 200 || $httpCode > 302) || ($href == '' || $href[0] == '/')) { - $brokenLink = new BrokenExternalLinks(); - $brokenLink->PageID = $page->ID; - $brokenLink->Link = $href; - $brokenLink->HTTPCode = $httpCode; - $brokenLink->write(); + // set the broken link class + $class = ($class && stripos($class, 'ss-broken')) ? + $class . ' ss-broken' : 'ss-broken'; + $link->setAttribute('class', ($class ? $class : 'ss-broken')); + $htmlValue->__call('saveHTML', array()); - // TODO set the broken link class - /* - $class = $link->getAttribute('class'); - $class = ($class) ? $class . 'ss-broken' : 'ss-broken'; - $link->setAttribute('class', ($class ? "$class ss-broken" : 'ss-broken')); - */ + $page->Content = $htmlValue->getContent(); + $page->write(); - // use raw sql query to set broken link as calling the dataobject write - // method will reset the links if no broken internal links are found - $query = "UPDATE \"SiteTree\" SET \"HasBrokenLink\" = 1 "; - $query .= "WHERE \"ID\" = " . (int)$page->ID; - $result = DB::query($query); - if (!$result) { - // error updating hasBrokenLink + if (!$page->HasBrokenLink) { + // bypass the ORM as syncLinkTracking does not allow you + // to update HasBrokenLink to true + $query = "UPDATE \"SiteTree_Live\" SET \"HasBrokenLink\" = 1 "; + $query .= "WHERE \"ID\" = " . (int)$page->ID; + $result = DB::query($query); + if (!$result) { + $this->debugMessage('Error updating HasBrokenLink'); + } } } } } + ++$this->completedPages; } // run this again if queued jobs exists and is a valid int