FEATURE: Try to tidy HTML using external libraries if available

git-svn-id: svn://svn.silverstripe.com/silverstripe/open/modules/sapphire/branches/2.4@97017 467b73ca-7a2a-4603-9d3b-597d59a354a9
This commit is contained in:
Luke Hudson 2010-01-15 02:57:18 +00:00 committed by Sam Minnee
parent 1125de1b99
commit 6bfd9cc24c

View File

@ -29,6 +29,7 @@ class SS_HTMLValue extends ViewableData {
* @return string * @return string
*/ */
public function getContent() { public function getContent() {
$content = $this->cleanContent();
// strip the body tags from the output (which are automatically added by DOMDocument) // strip the body tags from the output (which are automatically added by DOMDocument)
return preg_replace ( return preg_replace (
array ( array (
@ -36,7 +37,7 @@ class SS_HTMLValue extends ViewableData {
'/<\/body[^>]*>\s*$/i' '/<\/body[^>]*>\s*$/i'
), ),
null, null,
$this->getDocument()->saveXML($this->getDocument()->documentElement->lastChild) $content
); );
} }
@ -50,7 +51,44 @@ class SS_HTMLValue extends ViewableData {
"<body>$content</body></html>" "<body>$content</body></html>"
); );
} }
/**
* Attempt to clean invalid HTML, which messes up diffs.
* This checks for various methods and cleans code if possible.
*
* NB: By default, only extremely simple tidying is performed,
* by passing through DomDocument::loadHTML and saveXML
* You will either need to install the php_tidy module
* See: http://www.php.net/manual/en/tidy.installation.php
* or else install the SilverStripe module for HTMLPurifier from:
* http://svn.silverstripe.com/open/modules/htmlpurifier/trunk
* See also: http://htmlpurifier.org
*/
protected function cleanContent() {
$doc = $this->getDocument();
// At most basic level of cleaning, use DOMDocument to save valid XML.
$content = $doc->saveXML($doc->documentElement->lastChild);
if (class_exists('Tidy')) {
// Check for the Tidy class, provided by php-tidy
$tidy = tidy_parse_string($content,
array(
'clean' => true,
'output-xhtml' => true,
'show-body-only' => true,
'wrap' => 0,
'input-encoding' => 'utf8',
'output-encoding' => 'utf8'
));
$tidy->cleanRepair();
$content = '' . $tidy;
} else if (class_exists('HTMLPurifier')) {
// Look otherwise for HTMLPurifier, provided by module.
$html = new HTMLPurifier();
$content = $html->purify($content);
}
return $content;
}
/** /**
* @return DOMDocument * @return DOMDocument
*/ */