mirror of
https://github.com/silverstripe/silverstripe-framework
synced 2024-10-22 12:05:37 +00:00
195 lines
5.1 KiB
PHP
195 lines
5.1 KiB
PHP
|
<?php
|
||
|
|
||
|
namespace SilverStripe\View\Parsers;
|
||
|
|
||
|
|
||
|
use InvalidArgumentException;
|
||
|
use SilverStripe\Core\Convert;
|
||
|
use SilverStripe\Core\Injector\Injector;
|
||
|
|
||
|
require_once 'difflib/difflib.php';
|
||
|
|
||
|
/**
|
||
|
* Class representing a 'diff' between two sequences of strings.
|
||
|
*/
|
||
|
class Diff extends \Diff
|
||
|
{
|
||
|
public static $html_cleaner_class = null;
|
||
|
|
||
|
/**
|
||
|
* Attempt to clean invalid HTML, which messes up diffs.
|
||
|
* This cleans code if possible, using an instance of HTMLCleaner
|
||
|
*
|
||
|
* NB: By default, only extremely simple tidying is performed,
|
||
|
* by passing through DomDocument::loadHTML and saveXML
|
||
|
*
|
||
|
* @param string $content HTML content
|
||
|
* @param HTMLCleaner $cleaner Optional instance of a HTMLCleaner class to
|
||
|
* use, overriding self::$html_cleaner_class
|
||
|
* @return mixed|string
|
||
|
*/
|
||
|
public static function cleanHTML($content, $cleaner = null) {
|
||
|
if (!$cleaner) {
|
||
|
if (self::$html_cleaner_class && class_exists(self::$html_cleaner_class)) {
|
||
|
$cleaner = Injector::inst()->create(self::$html_cleaner_class);
|
||
|
} else {
|
||
|
//load cleaner if the dependent class is available
|
||
|
$cleaner = HTMLCleaner::inst();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if ($cleaner) {
|
||
|
$content = $cleaner->cleanHTML($content);
|
||
|
} else {
|
||
|
// At most basic level of cleaning, use DOMDocument to save valid XML.
|
||
|
$doc = Injector::inst()->create('HTMLValue', $content);
|
||
|
$content = $doc->getContent();
|
||
|
}
|
||
|
|
||
|
// Remove empty <ins /> and <del /> tags because browsers hate them
|
||
|
$content = preg_replace('/<(ins|del)[^>]*\/>/','', $content);
|
||
|
|
||
|
return $content;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* @param string $from
|
||
|
* @param string $to
|
||
|
* @param bool $escape
|
||
|
* @return string
|
||
|
*/
|
||
|
public static function compareHTML($from, $to, $escape = false) {
|
||
|
// First split up the content into words and tags
|
||
|
$set1 = self::getHTMLChunks($from);
|
||
|
$set2 = self::getHTMLChunks($to);
|
||
|
|
||
|
// Diff that
|
||
|
$diff = new Diff($set1, $set2);
|
||
|
|
||
|
$tagStack[1] = $tagStack[2] = 0;
|
||
|
$rechunked[1] = $rechunked[2] = array();
|
||
|
|
||
|
// Go through everything, converting edited tags (and their content) into single chunks. Otherwise
|
||
|
// the generated HTML gets crusty
|
||
|
foreach($diff->edits as $edit) {
|
||
|
$lookForTag = false;
|
||
|
$stuffFor = [];
|
||
|
switch($edit->type) {
|
||
|
case 'copy':
|
||
|
$lookForTag = false;
|
||
|
$stuffFor[1] = $edit->orig;
|
||
|
$stuffFor[2] = $edit->orig;
|
||
|
break;
|
||
|
|
||
|
case 'change':
|
||
|
$lookForTag = true;
|
||
|
$stuffFor[1] = $edit->orig;
|
||
|
$stuffFor[2] = $edit->final;
|
||
|
break;
|
||
|
|
||
|
case 'add':
|
||
|
$lookForTag = true;
|
||
|
$stuffFor[1] = null;
|
||
|
$stuffFor[2] = $edit->final;
|
||
|
break;
|
||
|
|
||
|
case 'delete':
|
||
|
$lookForTag = true;
|
||
|
$stuffFor[1] = $edit->orig;
|
||
|
$stuffFor[2] = null;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
foreach($stuffFor as $listName => $chunks) {
|
||
|
if($chunks) {
|
||
|
foreach($chunks as $item) {
|
||
|
// $tagStack > 0 indicates that we should be tag-building
|
||
|
if ($tagStack[$listName]) {
|
||
|
$rechunked[$listName][sizeof($rechunked[$listName])-1] .= ' ' . $item;
|
||
|
} else {
|
||
|
$rechunked[$listName][] = $item;
|
||
|
}
|
||
|
|
||
|
if ($lookForTag
|
||
|
&& !$tagStack[$listName]
|
||
|
&& isset($item[0])
|
||
|
&& $item[0] == "<"
|
||
|
&& substr($item,0,2) != "</"
|
||
|
) {
|
||
|
$tagStack[$listName] = 1;
|
||
|
} else if($tagStack[$listName]) {
|
||
|
if(substr($item,0,2) == "</") {
|
||
|
$tagStack[$listName]--;
|
||
|
} else if(isset($item[0]) && $item[0] == "<") {
|
||
|
$tagStack[$listName]++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Diff the re-chunked data, turning it into maked up HTML
|
||
|
$diff = new Diff($rechunked[1], $rechunked[2]);
|
||
|
$content = '';
|
||
|
foreach($diff->edits as $edit) {
|
||
|
$orig = ($escape) ? Convert::raw2xml($edit->orig) : $edit->orig;
|
||
|
$final = ($escape) ? Convert::raw2xml($edit->final) : $edit->final;
|
||
|
|
||
|
switch($edit->type) {
|
||
|
case 'copy':
|
||
|
$content .= " " . implode(" ", $orig) . " ";
|
||
|
break;
|
||
|
|
||
|
case 'change':
|
||
|
$content .= " <ins>" . implode(" ", $final) . "</ins> ";
|
||
|
$content .= " <del>" . implode(" ", $orig) . "</del> ";
|
||
|
break;
|
||
|
|
||
|
case 'add':
|
||
|
$content .= " <ins>" . implode(" ", $final) . "</ins> ";
|
||
|
break;
|
||
|
|
||
|
case 'delete':
|
||
|
$content .= " <del>" . implode(" ", $orig) . "</del> ";
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return self::cleanHTML($content);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* @param string|array $content If passed as an array, values will be concatenated with a comma.
|
||
|
* @return array
|
||
|
*/
|
||
|
public static function getHTMLChunks($content) {
|
||
|
if($content && !is_string($content) && !is_array($content) && !is_numeric($content)) {
|
||
|
throw new InvalidArgumentException('$content parameter needs to be a string or array');
|
||
|
}
|
||
|
if(is_array($content)) {
|
||
|
$content = implode(',', $content);
|
||
|
}
|
||
|
|
||
|
$content = str_replace(array(" ", "<", ">"), array(" "," <", "> "), $content);
|
||
|
$candidateChunks = preg_split("/[\t\r\n ]+/", $content);
|
||
|
$chunks = [];
|
||
|
while($chunk = each($candidateChunks)) {
|
||
|
$item = $chunk['value'];
|
||
|
if(isset($item[0]) && $item[0] == "<") {
|
||
|
$newChunk = $item;
|
||
|
while($item[strlen($item)-1] != ">") {
|
||
|
$chunk = each($candidateChunks);
|
||
|
$item = $chunk['value'];
|
||
|
$newChunk .= ' ' . $item;
|
||
|
}
|
||
|
$chunks[] = $newChunk;
|
||
|
} else {
|
||
|
$chunks[] = $item;
|
||
|
}
|
||
|
}
|
||
|
return $chunks;
|
||
|
}
|
||
|
|
||
|
}
|