Refactor the link-tracking code and move it from framework.

This code has a dependency on SiteTree, so it fits much better in the
cms module.

Abstracted away the content parser so the same code can be reused both
in the render phase (to highlight the links) and in the write phase
(storing information about broken links and references).
This commit is contained in:
Mateusz Uzdowski 2014-08-12 16:39:11 +12:00
parent e4d9f62dd4
commit b41e081130
4 changed files with 280 additions and 45 deletions

View File

@ -4,3 +4,6 @@ LeftAndMain:
Security:
extensions:
- ErrorPageControllerExtension
HtmlEditorField:
extensions:
- SiteTreeLinkTracking_Highlighter

View File

@ -1,15 +1,27 @@
<?php
/**
* Adds tracking of links in any HTMLText fields which reference SiteTree or File items
* Adds tracking of links in any HTMLText fields which reference SiteTree or File items. Attaching this to any
* DataObject will add four fields which contain all links to SiteTree and File items referenced in any HTMLText fields,
* and two booleans to indicate if there are any broken links.
*
* Attaching this to any DataObject will add four fields which contain all links to SiteTree and File items
* referenced in any HTMLText fields, and two booleans to indicate if there are any broken links
* SiteTreeLinkTracking provides augmentSyncLinkTracking as an entry point for the tracking updater.
*
* Call augmentSyncLinkTracking to update those fields with any changes to those fields
* Additionally, a SiteTreeLinkTracking_Highlighter extension is provided which, when applied to HtmlEditorField,
* will reuse the link SiteTreeLinkTracking's parser to add "ss-broken" classes to all broken links found this way.
* The resulting class will be saved to the Content on the subsequent write operation. If links are found to be
* no longer broken, the class will be removed on the next write.
*
* The underlying SiteTreeLinkTracking_Parser can recognise broken internal links, broken internal anchors, and some
* typical broken links such as empty href, or a link starting with a slash.
*/
class SiteTreeLinkTracking extends DataExtension {
public $parser;
private static $dependencies = array(
'parser' => '%$SiteTreeLinkTracking_Parser'
);
private static $db = array(
"HasBrokenFile" => "Boolean",
"HasBrokenLink" => "Boolean"
@ -32,38 +44,32 @@ class SiteTreeLinkTracking extends DataExtension {
$linkedFiles = array();
$htmlValue = Injector::inst()->create('HTMLValue', $record->$field);
$links = $this->parser->process($htmlValue);
// Populate link tracking for internal links & links to asset files.
if($links = $htmlValue->getElementsByTagName('a')) foreach($links as $link) {
$href = Director::makeRelative($link->getAttribute('href'));
if($href) {
if(preg_match('/\[(sitetree|file)_link[,\s]id=([0-9]+)\]/i', $href, $matches)) {
$type = $matches[1];
$id = $matches[2];
if($type === 'sitetree') {
if(SiteTree::get()->byID($id)) {
$linkedPages[] = $id;
} else {
$record->HasBrokenLink = true;
}
} else if($type === 'file') {
if(File::get()->byID($id)) {
$linkedFiles[] = $id;
} else {
$record->HasBrokenFile = true;
}
foreach ($links as $link) {
switch ($link['Type']) {
case 'sitetree':
if ($link['Broken']) {
$record->HasBrokenLink = true;
} else {
$linkedPages[] = $link['Target'];
}
} else if($href == '' || $href[0] == '/') {
$record->HasBrokenLink = true;
} else if(stristr($href, '#')) {
// Deals-to broken anchors (Links with no anchor)
$find = preg_replace("/^(.+)?#(.+)+$/","$2", $href);
if(!preg_match("#(name|id)=\"{$find}\"#", $record->$field)) {
break;
case 'file':
if ($link['Broken']) {
$record->HasBrokenFile = true;
} else {
$linkedFiles[] = $link['Target'];
}
break;
default:
if ($link['Broken']) {
$record->HasBrokenLink = true;
}
}
break;
}
}
@ -71,8 +77,7 @@ class SiteTreeLinkTracking extends DataExtension {
if($images = $htmlValue->getElementsByTagName('img')) foreach($images as $img) {
if($image = File::find($path = urldecode(Director::makeRelative($img->getAttribute('src'))))) {
$linkedFiles[] = $image->ID;
}
else {
} else {
if(substr($path, 0, strlen(ASSETS_DIR) + 1) == ASSETS_DIR . '/') {
$record->HasBrokenFile = true;
}
@ -108,7 +113,6 @@ class SiteTreeLinkTracking extends DataExtension {
}
}
function augmentSyncLinkTracking() {
// Reset boolean broken flags
$this->owner->HasBrokenLink = false;
@ -128,4 +132,150 @@ class SiteTreeLinkTracking extends DataExtension {
foreach($htmlFields as $field) $this->trackLinksInField($field);
}
}
}
/**
* Extension for enabling highlighting of broken links in the HtmlEditorFields.
*/
class SiteTreeLinkTracking_Highlighter extends Extension {
public $parser;
private static $dependencies = array(
'parser' => '%$SiteTreeLinkTracking_Parser'
);
/**
* Adds an ability to highlight broken links in the content.
* It reuses the parser the SiteTreeLinkTracking uses for maintaining the references and the "broken" flags
* to make sure all pages listed in the BrokenLinkChecker highlight these in their content.
*/
public function onBeforeRender($field) {
// Handle situation when the field has been customised, i.e. via $properties on the HtmlEditorField::Field call.
$obj = $this->owner->getCustomisedObj() ?: $this->owner;
$value = $obj->value;
// Parse the text as DOM.
$htmlValue = Injector::inst()->create('HTMLValue', $value);
$links = $this->parser->process($htmlValue);
foreach ($links as $link) {
$classStr = $link['DOMReference']->getAttribute('class');
$classes = explode(' ', $classStr);
// Add or remove the broken class from the link, depending on the link status.
if ($link['Broken']) {
$classes = array_unique(array_merge($classes, array('ss-broken')));
} else {
$classes = array_diff($classes, array('ss-broken'));
}
$link['DOMReference']->setAttribute('class', implode(' ', $classes));
}
$obj->customise(array(
'Value' => htmlentities($htmlValue->getContent(), ENT_COMPAT, 'UTF-8')
));
}
}
/**
* A helper object for extracting information about links.
*/
class SiteTreeLinkTracking_Parser {
/**
* Finds the links that are of interest for the link tracking automation. Checks for brokenness and attaches
* extracted metadata so consumers can decide what to do with the DOM element (provided as DOMReference).
*
* @param SS_HTMLValue $htmlValue Object to parse the links from.
* @return array Associative array containing found links with the following field layout:
* Type: string, name of the link type
* Target: any, a reference to the target object, depends on the Type
* Anchor: string, anchor part of the link
* DOMReference: DOMElement, reference to the link to apply changes.
* Broken: boolean, a flag highlighting whether the link should be treated as broken.
*/
public function process(SS_HTMLValue $htmlValue) {
$results = array();
$links = $htmlValue->getElementsByTagName('a');
if(!$links) return $results;
foreach($links as $link) {
if (!$link->hasAttribute('href')) continue;
$href = Director::makeRelative($link->getAttribute('href'));
// Definitely broken links.
if($href == '' || $href[0] == '/') {
$results[] = array(
'Type' => 'broken',
'Target' => null,
'Anchor' => null,
'DOMReference' => $link,
'Broken' => true
);
continue;
}
// Link to a page on this site.
$matches = array();
if(preg_match('/\[sitetree_link(?:\s*|%20|,)?id=([0-9]+)\](#(.*))?/i', $href, $matches)) {
$page = DataObject::get_by_id('SiteTree', $matches[1]);
if (!$page) {
// Page doesn't exist.
$broken = true;
} else if (!empty($matches[3]) && !preg_match("/(name|id)=\"{$matches[3]}\"/", $page->Content)) {
// Broken anchor on the target page.
$broken = true;
} else {
$broken = false;
}
$results[] = array(
'Type' => 'sitetree',
'Target' => $matches[1],
'Anchor' => empty($matches[3]) ? null : $matches[3],
'DOMReference' => $link,
'Broken' => $broken
);
continue;
}
// Link to a file on this site.
$matches = array();
if(preg_match('/\[file_link(?:\s*|%20|,)?id=([0-9]+)\]/i', $href, $matches)) {
$results[] = array(
'Type' => 'file',
'Target' => $matches[1],
'Anchor' => null,
'DOMReference' => $link,
'Broken' => !DataObject::get_by_id('File', $matches[1])
);
continue;
}
// Local anchor.
$matches = array();
if(preg_match('/^#(.*)/i', $href, $matches)) {
$results[] = array(
'Type' => 'localanchor',
'Target' => null,
'Anchor' => $matches[1],
'DOMReference' => $link,
'Broken' => !preg_match("#(name|id)=\"{$matches[1]}\"#", $htmlValue->getContent())
);
continue;
}
}
return $results;
}
}

View File

@ -5,7 +5,7 @@
*/
class SiteTreeBrokenLinksTest extends SapphireTest {
protected static $fixture_file = 'SiteTreeBrokenLinksTest.yml';
public function testBrokenLinksBetweenPages() {
$obj = $this->objFromFixture('Page','content');
@ -17,31 +17,44 @@ class SiteTreeBrokenLinksTest extends SapphireTest {
$obj->syncLinkTracking();
$this->assertFalse($obj->HasBrokenLink, 'Page does NOT have a broken link');
}
public function testBrokenAnchorBetweenPages() {
$obj = $this->objFromFixture('Page','content');
$target = $this->objFromFixture('Page', 'about');
$obj->Content = "<a href=\"[sitetree_link,id={$target->ID}]#no-anchor-here\">this is a broken link</a>";
$obj->syncLinkTracking();
$this->assertTrue($obj->HasBrokenLink, 'Page has a broken link');
$obj->Content = "<a href=\"[sitetree_link,id={$target->ID}]#yes-anchor-here\">this is not a broken link</a>";
$obj->syncLinkTracking();
$this->assertFalse($obj->HasBrokenLink, 'Page does NOT have a broken link');
}
public function testBrokenVirtualPages() {
$obj = $this->objFromFixture('Page','content');
$vp = new VirtualPage();
$vp->CopyContentFromID = $obj->ID;
$vp->CopyContentFromID = $obj->ID;
$vp->syncLinkTracking();
$this->assertFalse($vp->HasBrokenLink, 'Working virtual page is NOT marked as broken');
$vp->CopyContentFromID = 12345678;
$vp->CopyContentFromID = 12345678;
$vp->syncLinkTracking();
$this->assertTrue($vp->HasBrokenLink, 'Broken virtual page IS marked as such');
}
public function testBrokenInternalRedirectorPages() {
$obj = $this->objFromFixture('Page','content');
$rp = new RedirectorPage();
$rp->RedirectionType = 'Internal';
$rp->LinkToID = $obj->ID;
$rp->LinkToID = $obj->ID;
$rp->syncLinkTracking();
$this->assertFalse($rp->HasBrokenLink, 'Working redirector page is NOT marked as broken');
$rp->LinkToID = 12345678;
$rp->LinkToID = 12345678;
$rp->syncLinkTracking();
$this->assertTrue($rp->HasBrokenLink, 'Broken redirector page IS marked as such');
}
@ -77,7 +90,8 @@ class SiteTreeBrokenLinksTest extends SapphireTest {
$liveObj = Versioned::get_one_by_stage("SiteTree", "Live", "\"SiteTree\".\"ID\" = $obj->ID");
$this->assertEquals(1, $liveObj->HasBrokenFile);
}
}
public function testDeletingMarksBackLinkedPagesAsBroken() {
$this->logInWithPermission('ADMIN');
@ -142,7 +156,6 @@ class SiteTreeBrokenLinksTest extends SapphireTest {
WHERE \"ID\" = $linkSrc->ID")->value());
}
public function testRestoreFixesBrokenLinks() {
// Create page and virtual page
$p = new Page();
@ -300,5 +313,6 @@ class SiteTreeBrokenLinksTest extends SapphireTest {
$obj->syncLinkTracking();
$this->assertFalse($obj->HasBrokenLink, 'Page doesn\'t have a broken anchor or skiplink');
}
}

View File

@ -0,0 +1,68 @@
<?php
class SiteTreeLinkTrackingTest extends SapphireTest {
function isBroken($content) {
$parser = new SiteTreeLinkTracking_Parser();
$htmlValue = Injector::inst()->create('HTMLValue', $content);
$links = $parser->process($htmlValue);
if (empty($links[0])) return false;
return $links[0]['Broken'];
}
function testParser() {
$this->assertTrue($this->isBroken('<a href="[sitetree_link,id=123]">link</a>'));
$this->assertTrue($this->isBroken('<a href="[sitetree_link,id=123]#no-such-anchor">link</a>'));
$this->assertTrue($this->isBroken('<a href="[file_link,id=123]">link</a>'));
$this->assertTrue($this->isBroken('<a href="">link</a>'));
$this->assertTrue($this->isBroken('<a href="/">link</a>'));
$this->assertFalse($this->isBroken('<a name="anchor">anchor</a>'));
$this->assertFalse($this->isBroken('<a id="anchor">anchor</a>'));
$page = new Page();
$page->Content = '<a name="yes-name-anchor">name</a><a id="yes-id-anchor">id</a>';
$page->write();
$file = new File();
$file->write();
$this->assertFalse($this->isBroken("<a href=\"[sitetree_link,id=$page->ID]\">link</a>"));
$this->assertFalse($this->isBroken("<a href=\"[sitetree_link,id=$page->ID]#yes-name-anchor\">link</a>"));
$this->assertFalse($this->isBroken("<a href=\"[sitetree_link,id=$page->ID]#yes-id-anchor\">link</a>"));
$this->assertFalse($this->isBroken("<a href=\"[file_link,id=$file->ID]\">link</a>"));
}
function highlight($content) {
$field = new SiteTreeLinkTrackingTest_Field('Test');
$field->setValue($content);
$newContent = html_entity_decode($field->Field(), ENT_COMPAT, 'UTF-8');
return $newContent;
}
function testHighlighter() {
$content = $this->highlight('<a href="[sitetree_link,id=123]" class="existing-class">link</a>');
$this->assertEquals(substr_count($content, 'ss-broken'), 1, 'A ss-broken class is added to the broken link.');
$this->assertEquals(substr_count($content, 'existing-class'), 1, 'Existing class is not removed.');
$content = $this->highlight('<a href="[sitetree_link,id=123]">link</a>');
$this->assertEquals(substr_count($content, 'ss-broken'), 1, 'ss-broken class is added to the broken link.');
$page = new Page();
$page->Content = '';
$page->write();
$content = $this->highlight(
"<a href=\"[sitetree_link,id=$page->ID]\" class=\"existing-class ss-broken ss-broken\">link</a>"
);
$this->assertEquals(substr_count($content, 'ss-broken'), 0, 'All ss-broken classes are removed from good link');
$this->assertEquals(substr_count($content, 'existing-class'), 1, 'Existing class is not removed.');
}
}
class SiteTreeLinkTrackingTest_Field extends HtmlEditorField implements TestOnly {
private static $extensions = array(
'SiteTreeLinkTracking_Highlighter'
);
}