FEATURE: Try to tidy HTML using external libraries if available (from r97017)

git-svn-id: svn://svn.silverstripe.com/silverstripe/open/modules/sapphire/trunk@102419 467b73ca-7a2a-4603-9d3b-597d59a354a9
This commit is contained in:
Ingo Schommer 2010-04-12 03:31:19 +00:00
parent 2179389da3
commit 5e4210755b

View File

@ -29,6 +29,7 @@ class SS_HTMLValue extends ViewableData {
* @return string
*/
public function getContent() {
$content = $this->cleanContent();
// strip the body tags from the output (which are automatically added by DOMDocument)
return preg_replace (
array (
@ -36,7 +37,7 @@ class SS_HTMLValue extends ViewableData {
'/<\/body[^>]*>\s*$/i'
),
null,
$this->getDocument()->saveXML($this->getDocument()->documentElement->lastChild)
$content
);
}
@ -51,6 +52,43 @@ class SS_HTMLValue extends ViewableData {
);
}
/**
* Attempt to clean invalid HTML, which messes up diffs.
* This checks for various methods and cleans code if possible.
*
* NB: By default, only extremely simple tidying is performed,
* by passing through DomDocument::loadHTML and saveXML
* You will either need to install the php_tidy module
* See: http://www.php.net/manual/en/tidy.installation.php
* or else install the SilverStripe module for HTMLPurifier from:
* http://svn.silverstripe.com/open/modules/htmlpurifier/trunk
* See also: http://htmlpurifier.org
*/
protected function cleanContent() {
$doc = $this->getDocument();
// At most basic level of cleaning, use DOMDocument to save valid XML.
$content = $doc->saveXML($doc->documentElement->lastChild);
if (class_exists('Tidy')) {
// Check for the Tidy class, provided by php-tidy
$tidy = tidy_parse_string($content,
array(
'clean' => true,
'output-xhtml' => true,
'show-body-only' => true,
'wrap' => 0,
'input-encoding' => 'utf8',
'output-encoding' => 'utf8'
));
$tidy->cleanRepair();
$content = '' . $tidy;
} else if (class_exists('HTMLPurifier')) {
// Look otherwise for HTMLPurifier, provided by module.
$html = new HTMLPurifier();
$content = $html->purify($content);
}
return $content;
}
/**
* @return DOMDocument
*/