2013-08-07 01:03:15 +02:00
|
|
|
<?php
|
|
|
|
/**
|
2014-08-12 06:39:11 +02:00
|
|
|
* Adds tracking of links in any HTMLText fields which reference SiteTree or File items. Attaching this to any
|
|
|
|
* DataObject will add four fields which contain all links to SiteTree and File items referenced in any HTMLText fields,
|
|
|
|
* and two booleans to indicate if there are any broken links.
|
|
|
|
*
|
|
|
|
* SiteTreeLinkTracking provides augmentSyncLinkTracking as an entry point for the tracking updater.
|
2013-08-07 01:03:15 +02:00
|
|
|
*
|
2014-08-12 06:39:11 +02:00
|
|
|
* Additionally, a SiteTreeLinkTracking_Highlighter extension is provided which, when applied to HtmlEditorField,
|
|
|
|
* will reuse the link SiteTreeLinkTracking's parser to add "ss-broken" classes to all broken links found this way.
|
|
|
|
* The resulting class will be saved to the Content on the subsequent write operation. If links are found to be
|
|
|
|
* no longer broken, the class will be removed on the next write.
|
2013-08-07 01:03:15 +02:00
|
|
|
*
|
2014-08-12 06:39:11 +02:00
|
|
|
* The underlying SiteTreeLinkTracking_Parser can recognise broken internal links, broken internal anchors, and some
|
|
|
|
* typical broken links such as empty href, or a link starting with a slash.
|
2013-08-07 01:03:15 +02:00
|
|
|
*/
|
|
|
|
class SiteTreeLinkTracking extends DataExtension {
|
|
|
|
|
2014-08-12 06:39:11 +02:00
|
|
|
public $parser;
|
|
|
|
|
|
|
|
private static $dependencies = array(
|
|
|
|
'parser' => '%$SiteTreeLinkTracking_Parser'
|
|
|
|
);
|
|
|
|
|
2013-08-07 01:03:15 +02:00
|
|
|
private static $db = array(
|
|
|
|
"HasBrokenFile" => "Boolean",
|
|
|
|
"HasBrokenLink" => "Boolean"
|
|
|
|
);
|
|
|
|
|
|
|
|
private static $many_many = array(
|
|
|
|
"LinkTracking" => "SiteTree",
|
|
|
|
"ImageTracking" => "File"
|
|
|
|
);
|
|
|
|
|
|
|
|
private static $many_many_extraFields = array(
|
|
|
|
"LinkTracking" => array("FieldName" => "Varchar"),
|
|
|
|
"ImageTracking" => array("FieldName" => "Varchar")
|
|
|
|
);
|
|
|
|
|
2014-08-15 02:04:48 +02:00
|
|
|
public function trackLinksInField($fieldName) {
|
2013-08-07 01:03:15 +02:00
|
|
|
$record = $this->owner;
|
|
|
|
|
|
|
|
$linkedPages = array();
|
|
|
|
$linkedFiles = array();
|
|
|
|
|
2014-08-15 02:04:48 +02:00
|
|
|
$htmlValue = Injector::inst()->create('HTMLValue', $record->$fieldName);
|
2014-08-12 06:39:11 +02:00
|
|
|
$links = $this->parser->process($htmlValue);
|
2013-08-07 01:03:15 +02:00
|
|
|
|
2014-08-15 02:04:48 +02:00
|
|
|
// Highlight broken links in the content.
|
|
|
|
foreach ($links as $link) {
|
|
|
|
$classStr = trim($link['DOMReference']->getAttribute('class'));
|
|
|
|
if (!$classStr) {
|
|
|
|
$classes = array();
|
|
|
|
} else {
|
|
|
|
$classes = explode(' ', $classStr);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add or remove the broken class from the link, depending on the link status.
|
|
|
|
if ($link['Broken']) {
|
|
|
|
$classes = array_unique(array_merge($classes, array('ss-broken')));
|
|
|
|
} else {
|
|
|
|
$classes = array_diff($classes, array('ss-broken'));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!empty($classes)) {
|
|
|
|
$link['DOMReference']->setAttribute('class', implode(' ', $classes));
|
|
|
|
} else {
|
|
|
|
$link['DOMReference']->removeAttribute('class');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
$record->$fieldName = $htmlValue->getContent();
|
|
|
|
|
2013-08-07 01:03:15 +02:00
|
|
|
// Populate link tracking for internal links & links to asset files.
|
2014-08-12 06:39:11 +02:00
|
|
|
foreach ($links as $link) {
|
|
|
|
switch ($link['Type']) {
|
|
|
|
case 'sitetree':
|
|
|
|
if ($link['Broken']) {
|
|
|
|
$record->HasBrokenLink = true;
|
|
|
|
} else {
|
|
|
|
$linkedPages[] = $link['Target'];
|
|
|
|
}
|
|
|
|
break;
|
2013-08-07 01:03:15 +02:00
|
|
|
|
2014-08-12 06:39:11 +02:00
|
|
|
case 'file':
|
|
|
|
if ($link['Broken']) {
|
|
|
|
$record->HasBrokenFile = true;
|
|
|
|
} else {
|
|
|
|
$linkedFiles[] = $link['Target'];
|
2013-08-07 01:03:15 +02:00
|
|
|
}
|
2014-08-12 06:39:11 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
if ($link['Broken']) {
|
2013-08-23 04:33:36 +02:00
|
|
|
$record->HasBrokenLink = true;
|
|
|
|
}
|
2014-08-12 06:39:11 +02:00
|
|
|
break;
|
2013-08-07 01:03:15 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add file tracking for image references
|
|
|
|
if($images = $htmlValue->getElementsByTagName('img')) foreach($images as $img) {
|
|
|
|
if($image = File::find($path = urldecode(Director::makeRelative($img->getAttribute('src'))))) {
|
|
|
|
$linkedFiles[] = $image->ID;
|
2014-08-12 06:39:11 +02:00
|
|
|
} else {
|
2013-08-07 01:03:15 +02:00
|
|
|
if(substr($path, 0, strlen(ASSETS_DIR) + 1) == ASSETS_DIR . '/') {
|
|
|
|
$record->HasBrokenFile = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the "LinkTracking" many_many
|
|
|
|
if($record->ID && $record->many_many('LinkTracking') && $tracker = $record->LinkTracking()) {
|
|
|
|
$tracker->removeByFilter(sprintf(
|
|
|
|
'"FieldName" = \'%s\' AND "%s" = %d',
|
2014-08-15 02:04:48 +02:00
|
|
|
$fieldName,
|
2013-08-07 01:03:15 +02:00
|
|
|
$tracker->getForeignKey(),
|
|
|
|
$record->ID
|
|
|
|
));
|
|
|
|
|
|
|
|
if($linkedPages) foreach($linkedPages as $item) {
|
2014-08-15 02:04:48 +02:00
|
|
|
$tracker->add($item, array('FieldName' => $fieldName));
|
2013-08-07 01:03:15 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update the "ImageTracking" many_many
|
|
|
|
if($record->ID && $record->many_many('ImageTracking') && $tracker = $record->ImageTracking()) {
|
|
|
|
$tracker->removeByFilter(sprintf(
|
|
|
|
'"FieldName" = \'%s\' AND "%s" = %d',
|
2014-08-15 02:04:48 +02:00
|
|
|
$fieldName,
|
2013-08-07 01:03:15 +02:00
|
|
|
$tracker->getForeignKey(),
|
|
|
|
$record->ID
|
|
|
|
));
|
|
|
|
|
|
|
|
if($linkedFiles) foreach($linkedFiles as $item) {
|
2014-08-15 02:04:48 +02:00
|
|
|
$tracker->add($item, array('FieldName' => $fieldName));
|
2013-08-07 01:03:15 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-08-15 02:04:48 +02:00
|
|
|
public function augmentSyncLinkTracking() {
|
2013-08-07 01:03:15 +02:00
|
|
|
// Reset boolean broken flags
|
|
|
|
$this->owner->HasBrokenLink = false;
|
|
|
|
$this->owner->HasBrokenFile = false;
|
|
|
|
|
|
|
|
// Build a list of HTMLText fields
|
|
|
|
$allFields = $this->owner->db();
|
|
|
|
$htmlFields = array();
|
|
|
|
foreach($allFields as $field => $fieldSpec) {
|
|
|
|
if(preg_match('/([^(]+)/', $fieldSpec, $matches)) {
|
|
|
|
$class = $matches[0];
|
|
|
|
if(class_exists($class)){
|
|
|
|
if($class == 'HTMLText' || is_subclass_of($class, 'HTMLText')) $htmlFields[] = $field;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
foreach($htmlFields as $field) $this->trackLinksInField($field);
|
|
|
|
}
|
2014-08-12 06:39:11 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* A helper object for extracting information about links.
|
|
|
|
*/
|
|
|
|
class SiteTreeLinkTracking_Parser {
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Finds the links that are of interest for the link tracking automation. Checks for brokenness and attaches
|
|
|
|
* extracted metadata so consumers can decide what to do with the DOM element (provided as DOMReference).
|
|
|
|
*
|
|
|
|
* @param SS_HTMLValue $htmlValue Object to parse the links from.
|
|
|
|
* @return array Associative array containing found links with the following field layout:
|
|
|
|
* Type: string, name of the link type
|
|
|
|
* Target: any, a reference to the target object, depends on the Type
|
|
|
|
* Anchor: string, anchor part of the link
|
|
|
|
* DOMReference: DOMElement, reference to the link to apply changes.
|
|
|
|
* Broken: boolean, a flag highlighting whether the link should be treated as broken.
|
|
|
|
*/
|
|
|
|
public function process(SS_HTMLValue $htmlValue) {
|
|
|
|
$results = array();
|
|
|
|
|
|
|
|
$links = $htmlValue->getElementsByTagName('a');
|
|
|
|
if(!$links) return $results;
|
|
|
|
|
|
|
|
foreach($links as $link) {
|
|
|
|
if (!$link->hasAttribute('href')) continue;
|
|
|
|
|
|
|
|
$href = Director::makeRelative($link->getAttribute('href'));
|
|
|
|
|
|
|
|
// Definitely broken links.
|
|
|
|
if($href == '' || $href[0] == '/') {
|
|
|
|
$results[] = array(
|
|
|
|
'Type' => 'broken',
|
|
|
|
'Target' => null,
|
|
|
|
'Anchor' => null,
|
|
|
|
'DOMReference' => $link,
|
|
|
|
'Broken' => true
|
|
|
|
);
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Link to a page on this site.
|
|
|
|
$matches = array();
|
|
|
|
if(preg_match('/\[sitetree_link(?:\s*|%20|,)?id=([0-9]+)\](#(.*))?/i', $href, $matches)) {
|
|
|
|
$page = DataObject::get_by_id('SiteTree', $matches[1]);
|
|
|
|
if (!$page) {
|
|
|
|
// Page doesn't exist.
|
|
|
|
$broken = true;
|
|
|
|
} else if (!empty($matches[3]) && !preg_match("/(name|id)=\"{$matches[3]}\"/", $page->Content)) {
|
|
|
|
// Broken anchor on the target page.
|
|
|
|
$broken = true;
|
|
|
|
} else {
|
|
|
|
$broken = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
$results[] = array(
|
|
|
|
'Type' => 'sitetree',
|
|
|
|
'Target' => $matches[1],
|
|
|
|
'Anchor' => empty($matches[3]) ? null : $matches[3],
|
|
|
|
'DOMReference' => $link,
|
|
|
|
'Broken' => $broken
|
|
|
|
);
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Link to a file on this site.
|
|
|
|
$matches = array();
|
|
|
|
if(preg_match('/\[file_link(?:\s*|%20|,)?id=([0-9]+)\]/i', $href, $matches)) {
|
|
|
|
$results[] = array(
|
|
|
|
'Type' => 'file',
|
|
|
|
'Target' => $matches[1],
|
|
|
|
'Anchor' => null,
|
|
|
|
'DOMReference' => $link,
|
|
|
|
'Broken' => !DataObject::get_by_id('File', $matches[1])
|
|
|
|
);
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Local anchor.
|
|
|
|
$matches = array();
|
|
|
|
if(preg_match('/^#(.*)/i', $href, $matches)) {
|
|
|
|
$results[] = array(
|
|
|
|
'Type' => 'localanchor',
|
|
|
|
'Target' => null,
|
|
|
|
'Anchor' => $matches[1],
|
|
|
|
'DOMReference' => $link,
|
|
|
|
'Broken' => !preg_match("#(name|id)=\"{$matches[1]}\"#", $htmlValue->getContent())
|
|
|
|
);
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return $results;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|