silverstripe-cms/code/Model/SiteTreeLinkTracking_Parser.php

103 lines
3.5 KiB
PHP
Raw Permalink Normal View History

<?php
namespace SilverStripe\CMS\Model;
use DOMElement;
use SilverStripe\Control\Director;
use SilverStripe\ORM\DataObject;
2016-09-09 01:26:24 +02:00
use SilverStripe\View\Parsers\HTMLValue;
/**
* A helper object for extracting information about links.
*/
class SiteTreeLinkTracking_Parser
{
2017-01-25 21:59:25 +01:00
/**
* Finds the links that are of interest for the link tracking automation. Checks for brokenness and attaches
* extracted metadata so consumers can decide what to do with the DOM element (provided as DOMReference).
*
* @param HTMLValue $htmlValue Object to parse the links from.
* @return array Associative array containing found links with the following field layout:
* Type: string, name of the link type
* Target: any, a reference to the target object, depends on the Type
* Anchor: string, anchor part of the link
* DOMReference: DOMElement, reference to the link to apply changes.
* Broken: boolean, a flag highlighting whether the link should be treated as broken.
*/
public function process(HTMLValue $htmlValue)
{
$results = [];
2017-01-25 21:59:25 +01:00
$links = $htmlValue->getElementsByTagName('a');
if (!$links) {
return $results;
}
/** @var DOMElement $link */
2017-01-25 21:59:25 +01:00
foreach ($links as $link) {
if (!$link->hasAttribute('href')) {
continue;
}
$href = $link->getAttribute('href');
if (Director::is_site_url($href)) {
$href = Director::makeRelative($href);
}
2017-01-25 21:59:25 +01:00
// Definitely broken links.
if ($href == '' || $href[0] == '/') {
$results[] = [
2017-01-25 21:59:25 +01:00
'Type' => 'broken',
'Target' => null,
'Anchor' => null,
'DOMReference' => $link,
'Broken' => true
];
2017-01-25 21:59:25 +01:00
continue;
}
// Link to a page on this site.
$matches = [];
2022-04-13 07:07:59 +02:00
if (preg_match('/\[sitetree_link(?:\s*|%20|,)?id=(?<id>[0-9]+)\](#(?<anchor>.*))?/i', $href ?? '', $matches)) {
// Check if page link is broken
$page = DataObject::get_by_id(SiteTree::class, $matches['id']);
2017-01-25 21:59:25 +01:00
if (!$page) {
// Page doesn't exist.
$broken = true;
} elseif (!empty($matches['anchor'])) {
// Ensure anchor isn't broken on target page
2022-04-13 07:07:59 +02:00
$broken = !in_array($matches['anchor'], $page->getAnchorsOnPage() ?? []);
2017-01-25 21:59:25 +01:00
} else {
$broken = false;
2017-01-25 21:59:25 +01:00
}
$results[] = [
2017-01-25 21:59:25 +01:00
'Type' => 'sitetree',
'Target' => $matches['id'],
'Anchor' => empty($matches['anchor']) ? null : $matches['anchor'],
'DOMReference' => $link,
'Broken' => $broken
];
2017-01-25 21:59:25 +01:00
continue;
}
// Local anchor.
2022-04-13 07:07:59 +02:00
if (preg_match('/^#(.*)/i', $href ?? '', $matches)) {
$anchor = preg_quote($matches[1] ?? '', '#');
$results[] = [
2017-01-25 21:59:25 +01:00
'Type' => 'localanchor',
'Target' => null,
'Anchor' => $matches[1],
'DOMReference' => $link,
2022-04-13 07:07:59 +02:00
'Broken' => !preg_match("#(name|id)=\"{$anchor}\"#", $htmlValue->getContent() ?? '')
];
2017-01-25 21:59:25 +01:00
continue;
}
}
return $results;
}
}