create(self::$html_cleaner_class); } else { //load cleaner if the dependent class is available $cleaner = HTMLCleaner::inst(); } } if ($cleaner) { $content = $cleaner->cleanHTML($content); } else { // At most basic level of cleaning, use DOMDocument to save valid XML. $doc = HTMLValue::create($content); $content = $doc->getContent(); } // Remove empty and tags because browsers hate them $content = preg_replace('/<(ins|del)[^>]*\/>/', '', $content ?? ''); return $content; } /** * @param string $from * @param string $to * @param bool $escape * @return string * * @deprecated 4.13.0 Will be replaced with SilverStripe\View\Parsers\HtmlDiff::compareHTML() */ public static function compareHTML($from, $to, $escape = false) { Deprecation::notice('4.13.0', 'Will be replaced with SilverStripe\View\Parsers\HtmlDiff::compareHTML()'); // First split up the content into words and tags $set1 = self::getHTMLChunks($from); $set2 = self::getHTMLChunks($to); // Diff that $diff = new Diff($set1, $set2); $tagStack[1] = $tagStack[2] = 0; $rechunked[1] = $rechunked[2] = []; // Go through everything, converting edited tags (and their content) into single chunks. Otherwise // the generated HTML gets crusty foreach ($diff->edits as $edit) { $lookForTag = false; $stuffFor = []; switch ($edit->type) { case 'copy': $lookForTag = false; $stuffFor[1] = $edit->orig; $stuffFor[2] = $edit->orig; break; case 'change': $lookForTag = true; $stuffFor[1] = $edit->orig; $stuffFor[2] = $edit->final; break; case 'add': $lookForTag = true; $stuffFor[1] = null; $stuffFor[2] = $edit->final; break; case 'delete': $lookForTag = true; $stuffFor[1] = $edit->orig; $stuffFor[2] = null; break; } foreach ($stuffFor as $listName => $chunks) { if ($chunks) { foreach ($chunks as $item) { // $tagStack > 0 indicates that we should be tag-building if ($tagStack[$listName]) { $rechunked[$listName][sizeof($rechunked[$listName])-1] .= ' ' . $item; } else { $rechunked[$listName][] = $item; } if ($lookForTag && !$tagStack[$listName] && isset($item[0]) && $item[0] == "<" && substr($item ?? '', 0, 2) != "edits as $edit) { $orig = ($escape) ? Convert::raw2xml($edit->orig) : $edit->orig; $final = ($escape) ? Convert::raw2xml($edit->final) : $edit->final; switch ($edit->type) { case 'copy': $content .= " " . implode(" ", $orig) . " "; break; case 'change': $content .= " " . implode(" ", $final) . " "; $content .= " " . implode(" ", $orig) . " "; break; case 'add': $content .= " " . implode(" ", $final) . " "; break; case 'delete': $content .= " " . implode(" ", $orig) . " "; break; } } return self::cleanHTML($content); } /** * @param string|bool|array $content If passed as an array, values will be concatenated with a comma. * @return array * * @deprecated 4.13.0 Will be removed without equivalent functionality */ public static function getHTMLChunks($content) { Deprecation::notice('4.13.0', 'Will be removed without equivalent functionality'); if ($content && !is_string($content) && !is_array($content) && !is_numeric($content) && !is_bool($content)) { throw new InvalidArgumentException('$content parameter needs to be a string or array'); } if (is_bool($content)) { // Convert boolean to strings $content = $content ? "true" : "false"; } if (is_array($content)) { $content = array_filter($content ?? [], 'is_scalar'); // Convert array to CSV $content = implode(',', $content); } $content = str_replace([" ", "<", ">"], [" "," <", "> "], $content ?? ''); $candidateChunks = preg_split("/[\t\r\n ]+/", $content ?? ''); $chunks = []; for ($i = 0; $i < count($candidateChunks ?? []); $i++) { $item = $candidateChunks[$i]; if (isset($item[0]) && $item[0] == "<") { $newChunk = $item; while ($item[strlen($item)-1] != ">") { if (++$i >= count($candidateChunks ?? [])) { break; } $item = $candidateChunks[$i]; $newChunk .= ' ' . $item; } $chunks[] = $newChunk; } else { $chunks[] = $item; } } return $chunks; } }