From 5e4210755b9cd2dda9385b7174544ddd0a4a1f2c Mon Sep 17 00:00:00 2001 From: Ingo Schommer Date: Mon, 12 Apr 2010 03:31:19 +0000 Subject: [PATCH] FEATURE: Try to tidy HTML using external libraries if available (from r97017) git-svn-id: svn://svn.silverstripe.com/silverstripe/open/modules/sapphire/trunk@102419 467b73ca-7a2a-4603-9d3b-597d59a354a9 --- integration/HTMLValue.php | 42 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/integration/HTMLValue.php b/integration/HTMLValue.php index ee38a6535..3ec0a70f8 100755 --- a/integration/HTMLValue.php +++ b/integration/HTMLValue.php @@ -29,6 +29,7 @@ class SS_HTMLValue extends ViewableData { * @return string */ public function getContent() { + $content = $this->cleanContent(); // strip the body tags from the output (which are automatically added by DOMDocument) return preg_replace ( array ( @@ -36,7 +37,7 @@ class SS_HTMLValue extends ViewableData { '/<\/body[^>]*>\s*$/i' ), null, - $this->getDocument()->saveXML($this->getDocument()->documentElement->lastChild) + $content ); } @@ -50,7 +51,44 @@ class SS_HTMLValue extends ViewableData { "$content" ); } - + + /** + * Attempt to clean invalid HTML, which messes up diffs. + * This checks for various methods and cleans code if possible. + * + * NB: By default, only extremely simple tidying is performed, + * by passing through DomDocument::loadHTML and saveXML + * You will either need to install the php_tidy module + * See: http://www.php.net/manual/en/tidy.installation.php + * or else install the SilverStripe module for HTMLPurifier from: + * http://svn.silverstripe.com/open/modules/htmlpurifier/trunk + * See also: http://htmlpurifier.org + */ + protected function cleanContent() { + $doc = $this->getDocument(); + // At most basic level of cleaning, use DOMDocument to save valid XML. + $content = $doc->saveXML($doc->documentElement->lastChild); + if (class_exists('Tidy')) { + // Check for the Tidy class, provided by php-tidy + $tidy = tidy_parse_string($content, + array( + 'clean' => true, + 'output-xhtml' => true, + 'show-body-only' => true, + 'wrap' => 0, + 'input-encoding' => 'utf8', + 'output-encoding' => 'utf8' + )); + $tidy->cleanRepair(); + $content = '' . $tidy; + } else if (class_exists('HTMLPurifier')) { + // Look otherwise for HTMLPurifier, provided by module. + $html = new HTMLPurifier(); + $content = $html->purify($content); + } + return $content; + } + /** * @return DOMDocument */