2016-08-16 03:22:58 +02:00
|
|
|
<?php
|
|
|
|
namespace SilverStripe\CMS\Model;
|
|
|
|
|
2017-10-30 02:38:31 +01:00
|
|
|
use DOMElement;
|
2016-08-23 04:36:06 +02:00
|
|
|
use SilverStripe\Control\Director;
|
2016-08-16 03:22:58 +02:00
|
|
|
use SilverStripe\ORM\DataObject;
|
2016-09-09 01:26:24 +02:00
|
|
|
use SilverStripe\View\Parsers\HTMLValue;
|
2016-08-16 03:22:58 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* A helper object for extracting information about links.
|
|
|
|
*/
|
|
|
|
class SiteTreeLinkTracking_Parser
|
|
|
|
{
|
|
|
|
|
2017-01-25 21:59:25 +01:00
|
|
|
/**
|
|
|
|
* Finds the links that are of interest for the link tracking automation. Checks for brokenness and attaches
|
|
|
|
* extracted metadata so consumers can decide what to do with the DOM element (provided as DOMReference).
|
|
|
|
*
|
|
|
|
* @param HTMLValue $htmlValue Object to parse the links from.
|
|
|
|
* @return array Associative array containing found links with the following field layout:
|
|
|
|
* Type: string, name of the link type
|
|
|
|
* Target: any, a reference to the target object, depends on the Type
|
|
|
|
* Anchor: string, anchor part of the link
|
|
|
|
* DOMReference: DOMElement, reference to the link to apply changes.
|
|
|
|
* Broken: boolean, a flag highlighting whether the link should be treated as broken.
|
|
|
|
*/
|
|
|
|
public function process(HTMLValue $htmlValue)
|
|
|
|
{
|
2020-04-19 06:18:01 +02:00
|
|
|
$results = [];
|
2017-01-25 21:59:25 +01:00
|
|
|
|
|
|
|
$links = $htmlValue->getElementsByTagName('a');
|
|
|
|
if (!$links) {
|
|
|
|
return $results;
|
|
|
|
}
|
|
|
|
|
2017-10-30 02:38:31 +01:00
|
|
|
/** @var DOMElement $link */
|
2017-01-25 21:59:25 +01:00
|
|
|
foreach ($links as $link) {
|
|
|
|
if (!$link->hasAttribute('href')) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2017-10-30 02:38:31 +01:00
|
|
|
$href = $link->getAttribute('href');
|
|
|
|
if (Director::is_site_url($href)) {
|
|
|
|
$href = Director::makeRelative($href);
|
|
|
|
}
|
2017-01-25 21:59:25 +01:00
|
|
|
|
|
|
|
// Definitely broken links.
|
|
|
|
if ($href == '' || $href[0] == '/') {
|
2020-04-19 06:18:01 +02:00
|
|
|
$results[] = [
|
2017-01-25 21:59:25 +01:00
|
|
|
'Type' => 'broken',
|
|
|
|
'Target' => null,
|
|
|
|
'Anchor' => null,
|
|
|
|
'DOMReference' => $link,
|
|
|
|
'Broken' => true
|
2020-04-19 06:18:01 +02:00
|
|
|
];
|
2017-01-25 21:59:25 +01:00
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Link to a page on this site.
|
2020-04-19 06:18:01 +02:00
|
|
|
$matches = [];
|
2017-01-25 21:59:25 +01:00
|
|
|
if (preg_match('/\[sitetree_link(?:\s*|%20|,)?id=(?<id>[0-9]+)\](#(?<anchor>.*))?/i', $href, $matches)) {
|
2018-04-06 05:53:57 +02:00
|
|
|
// Check if page link is broken
|
|
|
|
/** @var SiteTree $page */
|
2017-09-20 03:51:07 +02:00
|
|
|
$page = DataObject::get_by_id(SiteTree::class, $matches['id']);
|
2017-01-25 21:59:25 +01:00
|
|
|
if (!$page) {
|
|
|
|
// Page doesn't exist.
|
|
|
|
$broken = true;
|
2018-04-06 05:53:57 +02:00
|
|
|
} elseif (!empty($matches['anchor'])) {
|
|
|
|
// Ensure anchor isn't broken on target page
|
|
|
|
$anchor = preg_quote($matches['anchor'], '/');
|
|
|
|
$broken = !preg_match("/(name|id)=\"{$anchor}\"/", $page->Content);
|
2017-01-25 21:59:25 +01:00
|
|
|
} else {
|
2018-04-06 05:53:57 +02:00
|
|
|
$broken = false;
|
2017-01-25 21:59:25 +01:00
|
|
|
}
|
|
|
|
|
2020-04-19 06:18:01 +02:00
|
|
|
$results[] = [
|
2017-01-25 21:59:25 +01:00
|
|
|
'Type' => 'sitetree',
|
|
|
|
'Target' => $matches['id'],
|
|
|
|
'Anchor' => empty($matches['anchor']) ? null : $matches['anchor'],
|
|
|
|
'DOMReference' => $link,
|
|
|
|
'Broken' => $broken
|
2020-04-19 06:18:01 +02:00
|
|
|
];
|
2017-01-25 21:59:25 +01:00
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Local anchor.
|
|
|
|
if (preg_match('/^#(.*)/i', $href, $matches)) {
|
|
|
|
$anchor = preg_quote($matches[1], '#');
|
2020-04-19 06:18:01 +02:00
|
|
|
$results[] = [
|
2017-01-25 21:59:25 +01:00
|
|
|
'Type' => 'localanchor',
|
|
|
|
'Target' => null,
|
|
|
|
'Anchor' => $matches[1],
|
|
|
|
'DOMReference' => $link,
|
|
|
|
'Broken' => !preg_match("#(name|id)=\"{$anchor}\"#", $htmlValue->getContent())
|
2020-04-19 06:18:01 +02:00
|
|
|
];
|
2017-01-25 21:59:25 +01:00
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return $results;
|
|
|
|
}
|
2016-08-16 03:22:58 +02:00
|
|
|
}
|