From b41e081130c091b7ccdcc9499a499ae792c85614 Mon Sep 17 00:00:00 2001 From: Mateusz Uzdowski Date: Tue, 12 Aug 2014 16:39:11 +1200 Subject: [PATCH] Refactor the link-tracking code and move it from framework. This code has a dependency on SiteTree, so it fits much better in the cms module. Abstracted away the content parser so the same code can be reused both in the render phase (to highlight the links) and in the write phase (storing information about broken links and references). --- _config/config.yml | 3 + code/model/SiteTreeLinkTracking.php | 222 +++++++++++++++++++---- tests/model/SiteTreeBrokenLinksTest.php | 32 +++- tests/model/SiteTreeLinkTrackingTest.php | 68 +++++++ 4 files changed, 280 insertions(+), 45 deletions(-) create mode 100644 tests/model/SiteTreeLinkTrackingTest.php diff --git a/_config/config.yml b/_config/config.yml index 8b91fc39..8e169ffa 100644 --- a/_config/config.yml +++ b/_config/config.yml @@ -4,3 +4,6 @@ LeftAndMain: Security: extensions: - ErrorPageControllerExtension +HtmlEditorField: + extensions: + - SiteTreeLinkTracking_Highlighter diff --git a/code/model/SiteTreeLinkTracking.php b/code/model/SiteTreeLinkTracking.php index da661517..5286e572 100644 --- a/code/model/SiteTreeLinkTracking.php +++ b/code/model/SiteTreeLinkTracking.php @@ -1,15 +1,27 @@ '%$SiteTreeLinkTracking_Parser' + ); + private static $db = array( "HasBrokenFile" => "Boolean", "HasBrokenLink" => "Boolean" @@ -32,38 +44,32 @@ class SiteTreeLinkTracking extends DataExtension { $linkedFiles = array(); $htmlValue = Injector::inst()->create('HTMLValue', $record->$field); + $links = $this->parser->process($htmlValue); // Populate link tracking for internal links & links to asset files. - if($links = $htmlValue->getElementsByTagName('a')) foreach($links as $link) { - $href = Director::makeRelative($link->getAttribute('href')); - - if($href) { - if(preg_match('/\[(sitetree|file)_link[,\s]id=([0-9]+)\]/i', $href, $matches)) { - $type = $matches[1]; - $id = $matches[2]; - - if($type === 'sitetree') { - if(SiteTree::get()->byID($id)) { - $linkedPages[] = $id; - } else { - $record->HasBrokenLink = true; - } - } else if($type === 'file') { - if(File::get()->byID($id)) { - $linkedFiles[] = $id; - } else { - $record->HasBrokenFile = true; - } + foreach ($links as $link) { + switch ($link['Type']) { + case 'sitetree': + if ($link['Broken']) { + $record->HasBrokenLink = true; + } else { + $linkedPages[] = $link['Target']; } - } else if($href == '' || $href[0] == '/') { - $record->HasBrokenLink = true; - } else if(stristr($href, '#')) { - // Deals-to broken anchors (Links with no anchor) - $find = preg_replace("/^(.+)?#(.+)+$/","$2", $href); - if(!preg_match("#(name|id)=\"{$find}\"#", $record->$field)) { + break; + + case 'file': + if ($link['Broken']) { + $record->HasBrokenFile = true; + } else { + $linkedFiles[] = $link['Target']; + } + break; + + default: + if ($link['Broken']) { $record->HasBrokenLink = true; } - } + break; } } @@ -71,8 +77,7 @@ class SiteTreeLinkTracking extends DataExtension { if($images = $htmlValue->getElementsByTagName('img')) foreach($images as $img) { if($image = File::find($path = urldecode(Director::makeRelative($img->getAttribute('src'))))) { $linkedFiles[] = $image->ID; - } - else { + } else { if(substr($path, 0, strlen(ASSETS_DIR) + 1) == ASSETS_DIR . '/') { $record->HasBrokenFile = true; } @@ -108,7 +113,6 @@ class SiteTreeLinkTracking extends DataExtension { } } - function augmentSyncLinkTracking() { // Reset boolean broken flags $this->owner->HasBrokenLink = false; @@ -128,4 +132,150 @@ class SiteTreeLinkTracking extends DataExtension { foreach($htmlFields as $field) $this->trackLinksInField($field); } -} \ No newline at end of file +} + +/** + * Extension for enabling highlighting of broken links in the HtmlEditorFields. + */ +class SiteTreeLinkTracking_Highlighter extends Extension { + + public $parser; + + private static $dependencies = array( + 'parser' => '%$SiteTreeLinkTracking_Parser' + ); + + /** + * Adds an ability to highlight broken links in the content. + * It reuses the parser the SiteTreeLinkTracking uses for maintaining the references and the "broken" flags + * to make sure all pages listed in the BrokenLinkChecker highlight these in their content. + */ + public function onBeforeRender($field) { + // Handle situation when the field has been customised, i.e. via $properties on the HtmlEditorField::Field call. + $obj = $this->owner->getCustomisedObj() ?: $this->owner; + $value = $obj->value; + + // Parse the text as DOM. + $htmlValue = Injector::inst()->create('HTMLValue', $value); + $links = $this->parser->process($htmlValue); + + foreach ($links as $link) { + $classStr = $link['DOMReference']->getAttribute('class'); + $classes = explode(' ', $classStr); + + // Add or remove the broken class from the link, depending on the link status. + if ($link['Broken']) { + $classes = array_unique(array_merge($classes, array('ss-broken'))); + } else { + $classes = array_diff($classes, array('ss-broken')); + } + $link['DOMReference']->setAttribute('class', implode(' ', $classes)); + } + + $obj->customise(array( + 'Value' => htmlentities($htmlValue->getContent(), ENT_COMPAT, 'UTF-8') + )); + } + +} + +/** + * A helper object for extracting information about links. + */ +class SiteTreeLinkTracking_Parser { + + /** + * Finds the links that are of interest for the link tracking automation. Checks for brokenness and attaches + * extracted metadata so consumers can decide what to do with the DOM element (provided as DOMReference). + * + * @param SS_HTMLValue $htmlValue Object to parse the links from. + * @return array Associative array containing found links with the following field layout: + * Type: string, name of the link type + * Target: any, a reference to the target object, depends on the Type + * Anchor: string, anchor part of the link + * DOMReference: DOMElement, reference to the link to apply changes. + * Broken: boolean, a flag highlighting whether the link should be treated as broken. + */ + public function process(SS_HTMLValue $htmlValue) { + $results = array(); + + $links = $htmlValue->getElementsByTagName('a'); + if(!$links) return $results; + + foreach($links as $link) { + if (!$link->hasAttribute('href')) continue; + + $href = Director::makeRelative($link->getAttribute('href')); + + // Definitely broken links. + if($href == '' || $href[0] == '/') { + $results[] = array( + 'Type' => 'broken', + 'Target' => null, + 'Anchor' => null, + 'DOMReference' => $link, + 'Broken' => true + ); + + continue; + } + + // Link to a page on this site. + $matches = array(); + if(preg_match('/\[sitetree_link(?:\s*|%20|,)?id=([0-9]+)\](#(.*))?/i', $href, $matches)) { + $page = DataObject::get_by_id('SiteTree', $matches[1]); + if (!$page) { + // Page doesn't exist. + $broken = true; + } else if (!empty($matches[3]) && !preg_match("/(name|id)=\"{$matches[3]}\"/", $page->Content)) { + // Broken anchor on the target page. + $broken = true; + } else { + $broken = false; + } + + $results[] = array( + 'Type' => 'sitetree', + 'Target' => $matches[1], + 'Anchor' => empty($matches[3]) ? null : $matches[3], + 'DOMReference' => $link, + 'Broken' => $broken + ); + + continue; + } + + // Link to a file on this site. + $matches = array(); + if(preg_match('/\[file_link(?:\s*|%20|,)?id=([0-9]+)\]/i', $href, $matches)) { + $results[] = array( + 'Type' => 'file', + 'Target' => $matches[1], + 'Anchor' => null, + 'DOMReference' => $link, + 'Broken' => !DataObject::get_by_id('File', $matches[1]) + ); + + continue; + } + + // Local anchor. + $matches = array(); + if(preg_match('/^#(.*)/i', $href, $matches)) { + $results[] = array( + 'Type' => 'localanchor', + 'Target' => null, + 'Anchor' => $matches[1], + 'DOMReference' => $link, + 'Broken' => !preg_match("#(name|id)=\"{$matches[1]}\"#", $htmlValue->getContent()) + ); + + continue; + } + + } + + return $results; + } + +} diff --git a/tests/model/SiteTreeBrokenLinksTest.php b/tests/model/SiteTreeBrokenLinksTest.php index 08dbda41..90fb9144 100644 --- a/tests/model/SiteTreeBrokenLinksTest.php +++ b/tests/model/SiteTreeBrokenLinksTest.php @@ -5,7 +5,7 @@ */ class SiteTreeBrokenLinksTest extends SapphireTest { protected static $fixture_file = 'SiteTreeBrokenLinksTest.yml'; - + public function testBrokenLinksBetweenPages() { $obj = $this->objFromFixture('Page','content'); @@ -17,31 +17,44 @@ class SiteTreeBrokenLinksTest extends SapphireTest { $obj->syncLinkTracking(); $this->assertFalse($obj->HasBrokenLink, 'Page does NOT have a broken link'); } - + + public function testBrokenAnchorBetweenPages() { + $obj = $this->objFromFixture('Page','content'); + $target = $this->objFromFixture('Page', 'about'); + + $obj->Content = "ID}]#no-anchor-here\">this is a broken link"; + $obj->syncLinkTracking(); + $this->assertTrue($obj->HasBrokenLink, 'Page has a broken link'); + + $obj->Content = "ID}]#yes-anchor-here\">this is not a broken link"; + $obj->syncLinkTracking(); + $this->assertFalse($obj->HasBrokenLink, 'Page does NOT have a broken link'); + } + public function testBrokenVirtualPages() { $obj = $this->objFromFixture('Page','content'); $vp = new VirtualPage(); - $vp->CopyContentFromID = $obj->ID; + $vp->CopyContentFromID = $obj->ID; $vp->syncLinkTracking(); $this->assertFalse($vp->HasBrokenLink, 'Working virtual page is NOT marked as broken'); - $vp->CopyContentFromID = 12345678; + $vp->CopyContentFromID = 12345678; $vp->syncLinkTracking(); $this->assertTrue($vp->HasBrokenLink, 'Broken virtual page IS marked as such'); } - + public function testBrokenInternalRedirectorPages() { $obj = $this->objFromFixture('Page','content'); $rp = new RedirectorPage(); $rp->RedirectionType = 'Internal'; - $rp->LinkToID = $obj->ID; + $rp->LinkToID = $obj->ID; $rp->syncLinkTracking(); $this->assertFalse($rp->HasBrokenLink, 'Working redirector page is NOT marked as broken'); - $rp->LinkToID = 12345678; + $rp->LinkToID = 12345678; $rp->syncLinkTracking(); $this->assertTrue($rp->HasBrokenLink, 'Broken redirector page IS marked as such'); } @@ -77,7 +90,8 @@ class SiteTreeBrokenLinksTest extends SapphireTest { $liveObj = Versioned::get_one_by_stage("SiteTree", "Live", "\"SiteTree\".\"ID\" = $obj->ID"); $this->assertEquals(1, $liveObj->HasBrokenFile); - } + } + public function testDeletingMarksBackLinkedPagesAsBroken() { $this->logInWithPermission('ADMIN'); @@ -142,7 +156,6 @@ class SiteTreeBrokenLinksTest extends SapphireTest { WHERE \"ID\" = $linkSrc->ID")->value()); } - public function testRestoreFixesBrokenLinks() { // Create page and virtual page $p = new Page(); @@ -300,5 +313,6 @@ class SiteTreeBrokenLinksTest extends SapphireTest { $obj->syncLinkTracking(); $this->assertFalse($obj->HasBrokenLink, 'Page doesn\'t have a broken anchor or skiplink'); } + } diff --git a/tests/model/SiteTreeLinkTrackingTest.php b/tests/model/SiteTreeLinkTrackingTest.php new file mode 100644 index 00000000..7fc43f9f --- /dev/null +++ b/tests/model/SiteTreeLinkTrackingTest.php @@ -0,0 +1,68 @@ +create('HTMLValue', $content); + $links = $parser->process($htmlValue); + + if (empty($links[0])) return false; + return $links[0]['Broken']; + } + + function testParser() { + $this->assertTrue($this->isBroken('link')); + $this->assertTrue($this->isBroken('link')); + $this->assertTrue($this->isBroken('link')); + $this->assertTrue($this->isBroken('link')); + $this->assertTrue($this->isBroken('link')); + + $this->assertFalse($this->isBroken('anchor')); + $this->assertFalse($this->isBroken('anchor')); + + $page = new Page(); + $page->Content = 'nameid'; + $page->write(); + + $file = new File(); + $file->write(); + + $this->assertFalse($this->isBroken("ID]\">link")); + $this->assertFalse($this->isBroken("ID]#yes-name-anchor\">link")); + $this->assertFalse($this->isBroken("ID]#yes-id-anchor\">link")); + $this->assertFalse($this->isBroken("ID]\">link")); + } + + function highlight($content) { + $field = new SiteTreeLinkTrackingTest_Field('Test'); + $field->setValue($content); + $newContent = html_entity_decode($field->Field(), ENT_COMPAT, 'UTF-8'); + return $newContent; + } + + function testHighlighter() { + $content = $this->highlight('link'); + $this->assertEquals(substr_count($content, 'ss-broken'), 1, 'A ss-broken class is added to the broken link.'); + $this->assertEquals(substr_count($content, 'existing-class'), 1, 'Existing class is not removed.'); + + $content = $this->highlight('link'); + $this->assertEquals(substr_count($content, 'ss-broken'), 1, 'ss-broken class is added to the broken link.'); + + $page = new Page(); + $page->Content = ''; + $page->write(); + + $content = $this->highlight( + "ID]\" class=\"existing-class ss-broken ss-broken\">link" + ); + $this->assertEquals(substr_count($content, 'ss-broken'), 0, 'All ss-broken classes are removed from good link'); + $this->assertEquals(substr_count($content, 'existing-class'), 1, 'Existing class is not removed.'); + } +} + +class SiteTreeLinkTrackingTest_Field extends HtmlEditorField implements TestOnly { + private static $extensions = array( + 'SiteTreeLinkTracking_Highlighter' + ); +}