pageToProcess)) { $pages = $this->pageToProcess; } else { if ($trackID) { $result = BrokenExternalPageTrack::get() ->filter('TrackID', $trackID); $pages = Versioned::get_by_stage('SiteTree', 'Live') ->exclude('ID', $result->column('PageID')) ->limit(10); } else { $pages = Versioned::get_by_stage('SiteTree', 'Live'); } } foreach ($pages as $page) { ++$this->totalPages; $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); if (!$htmlValue->isValid()) { continue; } // Populate link tracking for internal links & links to asset files. if($links = $htmlValue->getElementsByTagName('a')) foreach($links as $link) { $class = $link->getAttribute('class'); $pos = stripos($class, 'ss-broken'); if ($pos !== false && $page->HasBrokenLink == 1) continue; $href = Director::makeRelative($link->getAttribute('href')); if ($href == 'admin/') continue; // ignore SiteTree, anchor and assets links as they will be caught // by SiteTreeLinkTracking if(preg_match('/\[(file_link|sitetree_link),id=([0-9]+)\]/i', $href, $matches)) { continue; } else if (isset($href[0]) && $href[0] == '#') { continue; } else if(substr($href, 0, strlen(ASSETS_DIR) + 1) == ASSETS_DIR.'/') { continue; } if($href && function_exists('curl_init')) { $handle = curl_init($href); curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE); curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5); curl_setopt($handle, CURLOPT_TIMEOUT, 10); $response = curl_exec($handle); $httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE); curl_close($handle); // do we have any whitelisted codes $ignoreCodes = Config::inst()->get('CheckExternalLinks', 'IgnoreCodes'); // if the code is whitelisted set it to 200 $httpCode = (is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) ? 200 : $httpCode; // ignore empty hrefs and internal links if (($httpCode < 200 || $httpCode > 302) || ($href == '' || $href[0] == '/')) { $brokenLink = new BrokenExternalLink(); $brokenLink->PageID = $page->ID; $brokenLink->Link = $href; $brokenLink->HTTPCode = $httpCode; $brokenLink->write(); // set the broken link class $class = ($class && stripos($class, 'ss-broken')) ? $class . ' ss-broken' : 'ss-broken'; $link->setAttribute('class', ($class ? $class : 'ss-broken')); $htmlValue->__call('saveHTML', array()); $page->Content = $htmlValue->getContent(); $page->owner->write(); if (!$page->HasBrokenLink) { // bypass the ORM as syncLinkTracking does not allow you // to update HasBrokenLink to true $query = "UPDATE \"SiteTree_Live\" SET \"HasBrokenLink\" = 1 "; $query .= "WHERE \"ID\" = " . (int)$page->ID; $result = DB::query($query); if (!$result) { $this->debugMessage('Error updating HasBrokenLink'); } } } } } ++$this->completedPages; if ($trackID) { $trackPage = new BrokenExternalPageTrack(); $trackPage->PageID = $page->ID; $trackPage->TrackID = $trackID; $trackPage->write(); } } // run this again if queued jobs exists and is a valid int $queuedJob = Config::inst()->get('CheckExternalLinks', 'Delay'); if (isset($queuedJob) && is_int($queuedJob) && class_exists('QueuedJobService')) { $checkLinks = new CheckExternalLinksJob(); singleton('QueuedJobService') ->queueJob($checkLinks, date('Y-m-d H:i:s', time() + $queuedJob)); } } }