Merge pull request #1077 from mateusz/link-tracking

Refactor the link-tracking code and move it from framework.
This commit is contained in:
Sean Harvey 2014-08-14 14:11:28 +12:00
commit 8baa126c1b
4 changed files with 280 additions and 45 deletions

View File

@ -4,3 +4,6 @@ LeftAndMain:
Security: Security:
extensions: extensions:
- ErrorPageControllerExtension - ErrorPageControllerExtension
HtmlEditorField:
extensions:
- SiteTreeLinkTracking_Highlighter

View File

@ -1,15 +1,27 @@
<?php <?php
/** /**
* Adds tracking of links in any HTMLText fields which reference SiteTree or File items * Adds tracking of links in any HTMLText fields which reference SiteTree or File items. Attaching this to any
* DataObject will add four fields which contain all links to SiteTree and File items referenced in any HTMLText fields,
* and two booleans to indicate if there are any broken links.
* *
* Attaching this to any DataObject will add four fields which contain all links to SiteTree and File items * SiteTreeLinkTracking provides augmentSyncLinkTracking as an entry point for the tracking updater.
* referenced in any HTMLText fields, and two booleans to indicate if there are any broken links
* *
* Call augmentSyncLinkTracking to update those fields with any changes to those fields * Additionally, a SiteTreeLinkTracking_Highlighter extension is provided which, when applied to HtmlEditorField,
* will reuse the link SiteTreeLinkTracking's parser to add "ss-broken" classes to all broken links found this way.
* The resulting class will be saved to the Content on the subsequent write operation. If links are found to be
* no longer broken, the class will be removed on the next write.
*
* The underlying SiteTreeLinkTracking_Parser can recognise broken internal links, broken internal anchors, and some
* typical broken links such as empty href, or a link starting with a slash.
*/ */
class SiteTreeLinkTracking extends DataExtension { class SiteTreeLinkTracking extends DataExtension {
public $parser;
private static $dependencies = array(
'parser' => '%$SiteTreeLinkTracking_Parser'
);
private static $db = array( private static $db = array(
"HasBrokenFile" => "Boolean", "HasBrokenFile" => "Boolean",
"HasBrokenLink" => "Boolean" "HasBrokenLink" => "Boolean"
@ -32,38 +44,32 @@ class SiteTreeLinkTracking extends DataExtension {
$linkedFiles = array(); $linkedFiles = array();
$htmlValue = Injector::inst()->create('HTMLValue', $record->$field); $htmlValue = Injector::inst()->create('HTMLValue', $record->$field);
$links = $this->parser->process($htmlValue);
// Populate link tracking for internal links & links to asset files. // Populate link tracking for internal links & links to asset files.
if($links = $htmlValue->getElementsByTagName('a')) foreach($links as $link) { foreach ($links as $link) {
$href = Director::makeRelative($link->getAttribute('href')); switch ($link['Type']) {
case 'sitetree':
if($href) { if ($link['Broken']) {
if(preg_match('/\[(sitetree|file)_link[,\s]id=([0-9]+)\]/i', $href, $matches)) {
$type = $matches[1];
$id = $matches[2];
if($type === 'sitetree') {
if(SiteTree::get()->byID($id)) {
$linkedPages[] = $id;
} else {
$record->HasBrokenLink = true; $record->HasBrokenLink = true;
}
} else if($type === 'file') {
if(File::get()->byID($id)) {
$linkedFiles[] = $id;
} else { } else {
$linkedPages[] = $link['Target'];
}
break;
case 'file':
if ($link['Broken']) {
$record->HasBrokenFile = true; $record->HasBrokenFile = true;
} else {
$linkedFiles[] = $link['Target'];
} }
} break;
} else if($href == '' || $href[0] == '/') {
$record->HasBrokenLink = true; default:
} else if(stristr($href, '#')) { if ($link['Broken']) {
// Deals-to broken anchors (Links with no anchor)
$find = preg_replace("/^(.+)?#(.+)+$/","$2", $href);
if(!preg_match("#(name|id)=\"{$find}\"#", $record->$field)) {
$record->HasBrokenLink = true; $record->HasBrokenLink = true;
} }
} break;
} }
} }
@ -71,8 +77,7 @@ class SiteTreeLinkTracking extends DataExtension {
if($images = $htmlValue->getElementsByTagName('img')) foreach($images as $img) { if($images = $htmlValue->getElementsByTagName('img')) foreach($images as $img) {
if($image = File::find($path = urldecode(Director::makeRelative($img->getAttribute('src'))))) { if($image = File::find($path = urldecode(Director::makeRelative($img->getAttribute('src'))))) {
$linkedFiles[] = $image->ID; $linkedFiles[] = $image->ID;
} } else {
else {
if(substr($path, 0, strlen(ASSETS_DIR) + 1) == ASSETS_DIR . '/') { if(substr($path, 0, strlen(ASSETS_DIR) + 1) == ASSETS_DIR . '/') {
$record->HasBrokenFile = true; $record->HasBrokenFile = true;
} }
@ -108,7 +113,6 @@ class SiteTreeLinkTracking extends DataExtension {
} }
} }
function augmentSyncLinkTracking() { function augmentSyncLinkTracking() {
// Reset boolean broken flags // Reset boolean broken flags
$this->owner->HasBrokenLink = false; $this->owner->HasBrokenLink = false;
@ -129,3 +133,149 @@ class SiteTreeLinkTracking extends DataExtension {
foreach($htmlFields as $field) $this->trackLinksInField($field); foreach($htmlFields as $field) $this->trackLinksInField($field);
} }
} }
/**
* Extension for enabling highlighting of broken links in the HtmlEditorFields.
*/
class SiteTreeLinkTracking_Highlighter extends Extension {
public $parser;
private static $dependencies = array(
'parser' => '%$SiteTreeLinkTracking_Parser'
);
/**
* Adds an ability to highlight broken links in the content.
* It reuses the parser the SiteTreeLinkTracking uses for maintaining the references and the "broken" flags
* to make sure all pages listed in the BrokenLinkChecker highlight these in their content.
*/
public function onBeforeRender($field) {
// Handle situation when the field has been customised, i.e. via $properties on the HtmlEditorField::Field call.
$obj = $this->owner->getCustomisedObj() ?: $this->owner;
$value = $obj->value;
// Parse the text as DOM.
$htmlValue = Injector::inst()->create('HTMLValue', $value);
$links = $this->parser->process($htmlValue);
foreach ($links as $link) {
$classStr = $link['DOMReference']->getAttribute('class');
$classes = explode(' ', $classStr);
// Add or remove the broken class from the link, depending on the link status.
if ($link['Broken']) {
$classes = array_unique(array_merge($classes, array('ss-broken')));
} else {
$classes = array_diff($classes, array('ss-broken'));
}
$link['DOMReference']->setAttribute('class', implode(' ', $classes));
}
$obj->customise(array(
'Value' => htmlentities($htmlValue->getContent(), ENT_COMPAT, 'UTF-8')
));
}
}
/**
* A helper object for extracting information about links.
*/
class SiteTreeLinkTracking_Parser {
/**
* Finds the links that are of interest for the link tracking automation. Checks for brokenness and attaches
* extracted metadata so consumers can decide what to do with the DOM element (provided as DOMReference).
*
* @param SS_HTMLValue $htmlValue Object to parse the links from.
* @return array Associative array containing found links with the following field layout:
* Type: string, name of the link type
* Target: any, a reference to the target object, depends on the Type
* Anchor: string, anchor part of the link
* DOMReference: DOMElement, reference to the link to apply changes.
* Broken: boolean, a flag highlighting whether the link should be treated as broken.
*/
public function process(SS_HTMLValue $htmlValue) {
$results = array();
$links = $htmlValue->getElementsByTagName('a');
if(!$links) return $results;
foreach($links as $link) {
if (!$link->hasAttribute('href')) continue;
$href = Director::makeRelative($link->getAttribute('href'));
// Definitely broken links.
if($href == '' || $href[0] == '/') {
$results[] = array(
'Type' => 'broken',
'Target' => null,
'Anchor' => null,
'DOMReference' => $link,
'Broken' => true
);
continue;
}
// Link to a page on this site.
$matches = array();
if(preg_match('/\[sitetree_link(?:\s*|%20|,)?id=([0-9]+)\](#(.*))?/i', $href, $matches)) {
$page = DataObject::get_by_id('SiteTree', $matches[1]);
if (!$page) {
// Page doesn't exist.
$broken = true;
} else if (!empty($matches[3]) && !preg_match("/(name|id)=\"{$matches[3]}\"/", $page->Content)) {
// Broken anchor on the target page.
$broken = true;
} else {
$broken = false;
}
$results[] = array(
'Type' => 'sitetree',
'Target' => $matches[1],
'Anchor' => empty($matches[3]) ? null : $matches[3],
'DOMReference' => $link,
'Broken' => $broken
);
continue;
}
// Link to a file on this site.
$matches = array();
if(preg_match('/\[file_link(?:\s*|%20|,)?id=([0-9]+)\]/i', $href, $matches)) {
$results[] = array(
'Type' => 'file',
'Target' => $matches[1],
'Anchor' => null,
'DOMReference' => $link,
'Broken' => !DataObject::get_by_id('File', $matches[1])
);
continue;
}
// Local anchor.
$matches = array();
if(preg_match('/^#(.*)/i', $href, $matches)) {
$results[] = array(
'Type' => 'localanchor',
'Target' => null,
'Anchor' => $matches[1],
'DOMReference' => $link,
'Broken' => !preg_match("#(name|id)=\"{$matches[1]}\"#", $htmlValue->getContent())
);
continue;
}
}
return $results;
}
}

View File

@ -18,6 +18,19 @@ class SiteTreeBrokenLinksTest extends SapphireTest {
$this->assertFalse($obj->HasBrokenLink, 'Page does NOT have a broken link'); $this->assertFalse($obj->HasBrokenLink, 'Page does NOT have a broken link');
} }
public function testBrokenAnchorBetweenPages() {
$obj = $this->objFromFixture('Page','content');
$target = $this->objFromFixture('Page', 'about');
$obj->Content = "<a href=\"[sitetree_link,id={$target->ID}]#no-anchor-here\">this is a broken link</a>";
$obj->syncLinkTracking();
$this->assertTrue($obj->HasBrokenLink, 'Page has a broken link');
$obj->Content = "<a href=\"[sitetree_link,id={$target->ID}]#yes-anchor-here\">this is not a broken link</a>";
$obj->syncLinkTracking();
$this->assertFalse($obj->HasBrokenLink, 'Page does NOT have a broken link');
}
public function testBrokenVirtualPages() { public function testBrokenVirtualPages() {
$obj = $this->objFromFixture('Page','content'); $obj = $this->objFromFixture('Page','content');
$vp = new VirtualPage(); $vp = new VirtualPage();
@ -78,6 +91,7 @@ class SiteTreeBrokenLinksTest extends SapphireTest {
$liveObj = Versioned::get_one_by_stage("SiteTree", "Live", "\"SiteTree\".\"ID\" = $obj->ID"); $liveObj = Versioned::get_one_by_stage("SiteTree", "Live", "\"SiteTree\".\"ID\" = $obj->ID");
$this->assertEquals(1, $liveObj->HasBrokenFile); $this->assertEquals(1, $liveObj->HasBrokenFile);
} }
public function testDeletingMarksBackLinkedPagesAsBroken() { public function testDeletingMarksBackLinkedPagesAsBroken() {
$this->logInWithPermission('ADMIN'); $this->logInWithPermission('ADMIN');
@ -142,7 +156,6 @@ class SiteTreeBrokenLinksTest extends SapphireTest {
WHERE \"ID\" = $linkSrc->ID")->value()); WHERE \"ID\" = $linkSrc->ID")->value());
} }
public function testRestoreFixesBrokenLinks() { public function testRestoreFixesBrokenLinks() {
// Create page and virtual page // Create page and virtual page
$p = new Page(); $p = new Page();
@ -300,5 +313,6 @@ class SiteTreeBrokenLinksTest extends SapphireTest {
$obj->syncLinkTracking(); $obj->syncLinkTracking();
$this->assertFalse($obj->HasBrokenLink, 'Page doesn\'t have a broken anchor or skiplink'); $this->assertFalse($obj->HasBrokenLink, 'Page doesn\'t have a broken anchor or skiplink');
} }
} }

View File

@ -0,0 +1,68 @@
<?php
class SiteTreeLinkTrackingTest extends SapphireTest {
function isBroken($content) {
$parser = new SiteTreeLinkTracking_Parser();
$htmlValue = Injector::inst()->create('HTMLValue', $content);
$links = $parser->process($htmlValue);
if (empty($links[0])) return false;
return $links[0]['Broken'];
}
function testParser() {
$this->assertTrue($this->isBroken('<a href="[sitetree_link,id=123]">link</a>'));
$this->assertTrue($this->isBroken('<a href="[sitetree_link,id=123]#no-such-anchor">link</a>'));
$this->assertTrue($this->isBroken('<a href="[file_link,id=123]">link</a>'));
$this->assertTrue($this->isBroken('<a href="">link</a>'));
$this->assertTrue($this->isBroken('<a href="/">link</a>'));
$this->assertFalse($this->isBroken('<a name="anchor">anchor</a>'));
$this->assertFalse($this->isBroken('<a id="anchor">anchor</a>'));
$page = new Page();
$page->Content = '<a name="yes-name-anchor">name</a><a id="yes-id-anchor">id</a>';
$page->write();
$file = new File();
$file->write();
$this->assertFalse($this->isBroken("<a href=\"[sitetree_link,id=$page->ID]\">link</a>"));
$this->assertFalse($this->isBroken("<a href=\"[sitetree_link,id=$page->ID]#yes-name-anchor\">link</a>"));
$this->assertFalse($this->isBroken("<a href=\"[sitetree_link,id=$page->ID]#yes-id-anchor\">link</a>"));
$this->assertFalse($this->isBroken("<a href=\"[file_link,id=$file->ID]\">link</a>"));
}
function highlight($content) {
$field = new SiteTreeLinkTrackingTest_Field('Test');
$field->setValue($content);
$newContent = html_entity_decode($field->Field(), ENT_COMPAT, 'UTF-8');
return $newContent;
}
function testHighlighter() {
$content = $this->highlight('<a href="[sitetree_link,id=123]" class="existing-class">link</a>');
$this->assertEquals(substr_count($content, 'ss-broken'), 1, 'A ss-broken class is added to the broken link.');
$this->assertEquals(substr_count($content, 'existing-class'), 1, 'Existing class is not removed.');
$content = $this->highlight('<a href="[sitetree_link,id=123]">link</a>');
$this->assertEquals(substr_count($content, 'ss-broken'), 1, 'ss-broken class is added to the broken link.');
$page = new Page();
$page->Content = '';
$page->write();
$content = $this->highlight(
"<a href=\"[sitetree_link,id=$page->ID]\" class=\"existing-class ss-broken ss-broken\">link</a>"
);
$this->assertEquals(substr_count($content, 'ss-broken'), 0, 'All ss-broken classes are removed from good link');
$this->assertEquals(substr_count($content, 'existing-class'), 1, 'Existing class is not removed.');
}
}
class SiteTreeLinkTrackingTest_Field extends HtmlEditorField implements TestOnly {
private static $extensions = array(
'SiteTreeLinkTracking_Highlighter'
);
}