ENHANCEMENT Allowing custom HTMLCleaner implementations via Diff:: and new TidyHTMLCleaner and PurifierHTMLCleaner classes

This commit is contained in:
Julian Seidenberg 2011-03-30 17:46:13 +13:00 committed by Ingo Schommer
parent 9b8594af8c
commit 341245dd4a
3 changed files with 129 additions and 0 deletions

View File

@ -677,8 +677,11 @@ class Diff
if (!$cleaner) {
if (class_exists(self::$html_cleaner_class)) {
$cleaner = new self::$html_cleaner_class;
} else {
$cleaner = HTMLCleaner::inst(); //load cleaner if the dependent class is available
}
}
if ($cleaner) {
$content = $cleaner->cleanHTML($content);
} else {

98
core/HTMLCleaner.php Normal file
View File

@ -0,0 +1,98 @@
<?php
/**
* @package sapphire
* @subpackage misc
*/
/**
* Base class for HTML cleaning implementations.
*/
abstract class HTMLCleaner extends Object {
/**
* @var array
*/
protected $defaultConfig = array();
/**
* @var $config Array configuration variables for HTMLCleaners that support configuration (like Tidy)
*/
public $config;
/**
* @param Array The configuration for the cleaner, if necessary
*/
public function __construct($config = null) {
if ($config) $this->config = array_merge($this->defaultConfig, $config);
else $this->config = $this->defaultConfig;
}
/**
* @param Array
*/
public function setConfig($config) {
$this->config = $config;
}
/**
* @return Array
*/
public function getConfig() {
return $this->config;
}
/**
* Passed a string, return HTML that has been tidied.
*
* @param String HTML
* @return String HTML, tidied
*/
public abstract function cleanHTML($content);
/**
* Experimental inst class to create a default html cleaner class
*
* @return PurifierHTMLCleaner|TidyHTMLCleaner
*/
public static function inst() {
if (class_exists('HTMLPurifier')) return new PurifierHTMLCleaner();
elseif (class_exists('tidy')) return new TidyHTMLCleaner();
}
}
/**
* Cleans HTML using the HTMLPurifier package
* http://htmlpurifier.org/
*/
class PurifierHTMLCleaner extends HTMLCleaner {
public function cleanHTML($content) {
$html = new HTMLPurifier();
$doc = new SS_HTMLValue($html->purify($content));
return $doc->getContent();
}
}
/**
* Cleans HTML using the Tidy package
* http://php.net/manual/en/book.tidy.php
*/
class TidyHTMLCleaner extends HTMLCleaner {
protected $defaultConfig = array(
'clean' => true,
'output-xhtml' => true,
'show-body-only' => true,
'wrap' => 0,
'doctype' => 'omit',
'input-encoding' => 'utf8',
'output-encoding' => 'utf8'
);
public function cleanHTML($content) {
$tidy = new tidy();
$output = $tidy->repairString($content, $this->config);
return $output;
}
}

View File

@ -0,0 +1,28 @@
<?php
/**
* @package sapphire
* @subpackage tests
*/
class HTMLCleanerTest extends SapphireTest {
function testHTMLClean() {
$cleaner = HTMLCleaner::inst();
if ($cleaner) {
$this->assertEquals(
$cleaner->cleanHTML('<p>wrong <b>nesting</i></p>' . "\n"),
'<p>wrong <b>nesting</b></p>' . "\n",
"HTML cleaned properly"
);
$this->assertEquals(
$cleaner->cleanHTML('<p>unclosed paragraph' . "\n"),
'<p>unclosed paragraph</p>' . "\n",
"HTML cleaned properly"
);
} else {
$this->markTestSkipped('No HTMLCleaner library available (tidy or HTMLBeautifier)');
}
}
}