<?php namespace SilverStripe\View\Parsers; use InvalidArgumentException; use SilverStripe\Core\Convert; use SilverStripe\Core\Injector\Injector; require_once 'difflib/difflib.php'; /** * Class representing a 'diff' between two sequences of strings. */ class Diff extends \Diff { public static $html_cleaner_class = null; /** * Attempt to clean invalid HTML, which messes up diffs. * This cleans code if possible, using an instance of HTMLCleaner * * NB: By default, only extremely simple tidying is performed, * by passing through DomDocument::loadHTML and saveXML * * @param string $content HTML content * @param HTMLCleaner $cleaner Optional instance of a HTMLCleaner class to * use, overriding self::$html_cleaner_class * @return mixed|string */ public static function cleanHTML($content, $cleaner = null) { if (!$cleaner) { if (self::$html_cleaner_class && class_exists(self::$html_cleaner_class)) { $cleaner = Injector::inst()->create(self::$html_cleaner_class); } else { //load cleaner if the dependent class is available $cleaner = HTMLCleaner::inst(); } } if ($cleaner) { $content = $cleaner->cleanHTML($content); } else { // At most basic level of cleaning, use DOMDocument to save valid XML. $doc = Injector::inst()->create('HTMLValue', $content); $content = $doc->getContent(); } // Remove empty <ins /> and <del /> tags because browsers hate them $content = preg_replace('/<(ins|del)[^>]*\/>/','', $content); return $content; } /** * @param string $from * @param string $to * @param bool $escape * @return string */ public static function compareHTML($from, $to, $escape = false) { // First split up the content into words and tags $set1 = self::getHTMLChunks($from); $set2 = self::getHTMLChunks($to); // Diff that $diff = new Diff($set1, $set2); $tagStack[1] = $tagStack[2] = 0; $rechunked[1] = $rechunked[2] = array(); // Go through everything, converting edited tags (and their content) into single chunks. Otherwise // the generated HTML gets crusty foreach($diff->edits as $edit) { $lookForTag = false; $stuffFor = []; switch($edit->type) { case 'copy': $lookForTag = false; $stuffFor[1] = $edit->orig; $stuffFor[2] = $edit->orig; break; case 'change': $lookForTag = true; $stuffFor[1] = $edit->orig; $stuffFor[2] = $edit->final; break; case 'add': $lookForTag = true; $stuffFor[1] = null; $stuffFor[2] = $edit->final; break; case 'delete': $lookForTag = true; $stuffFor[1] = $edit->orig; $stuffFor[2] = null; break; } foreach($stuffFor as $listName => $chunks) { if($chunks) { foreach($chunks as $item) { // $tagStack > 0 indicates that we should be tag-building if ($tagStack[$listName]) { $rechunked[$listName][sizeof($rechunked[$listName])-1] .= ' ' . $item; } else { $rechunked[$listName][] = $item; } if ($lookForTag && !$tagStack[$listName] && isset($item[0]) && $item[0] == "<" && substr($item,0,2) != "</" ) { $tagStack[$listName] = 1; } else if($tagStack[$listName]) { if(substr($item,0,2) == "</") { $tagStack[$listName]--; } else if(isset($item[0]) && $item[0] == "<") { $tagStack[$listName]++; } } } } } } // Diff the re-chunked data, turning it into maked up HTML $diff = new Diff($rechunked[1], $rechunked[2]); $content = ''; foreach($diff->edits as $edit) { $orig = ($escape) ? Convert::raw2xml($edit->orig) : $edit->orig; $final = ($escape) ? Convert::raw2xml($edit->final) : $edit->final; switch($edit->type) { case 'copy': $content .= " " . implode(" ", $orig) . " "; break; case 'change': $content .= " <ins>" . implode(" ", $final) . "</ins> "; $content .= " <del>" . implode(" ", $orig) . "</del> "; break; case 'add': $content .= " <ins>" . implode(" ", $final) . "</ins> "; break; case 'delete': $content .= " <del>" . implode(" ", $orig) . "</del> "; break; } } return self::cleanHTML($content); } /** * @param string|array $content If passed as an array, values will be concatenated with a comma. * @return array */ public static function getHTMLChunks($content) { if($content && !is_string($content) && !is_array($content) && !is_numeric($content)) { throw new InvalidArgumentException('$content parameter needs to be a string or array'); } if(is_array($content)) { $content = implode(',', $content); } $content = str_replace(array(" ", "<", ">"), array(" "," <", "> "), $content); $candidateChunks = preg_split("/[\t\r\n ]+/", $content); $chunks = []; while($chunk = each($candidateChunks)) { $item = $chunk['value']; if(isset($item[0]) && $item[0] == "<") { $newChunk = $item; while($item[strlen($item)-1] != ">") { $chunk = each($candidateChunks); $item = $chunk['value']; $newChunk .= ' ' . $item; } $chunks[] = $newChunk; } else { $chunks[] = $item; } } return $chunks; } }