silverstripe-framework/core/HTMLCleaner.php

101 lines
2.1 KiB
PHP
Raw Normal View History

<?php
/**
* @package framework
* @subpackage misc
*/
/**
* Base class for HTML cleaning implementations.
*/
abstract class HTMLCleaner extends Object {
/**
* @var array
*/
protected $defaultConfig = array();
2014-08-15 18:53:05 +12:00
/**
* @var $config Array configuration variables for HTMLCleaners that support configuration (like Tidy)
*/
public $config;
/**
* @param Array The configuration for the cleaner, if necessary
*/
public function __construct($config = null) {
if ($config) $this->config = array_merge($this->defaultConfig, $config);
else $this->config = $this->defaultConfig;
}
2014-08-15 18:53:05 +12:00
/**
* @param Array
*/
public function setConfig($config) {
$this->config = $config;
}
2014-08-15 18:53:05 +12:00
/**
* @return Array
*/
public function getConfig() {
return $this->config;
}
/**
* Passed a string, return HTML that has been tidied.
2014-08-15 18:53:05 +12:00
*
* @param String HTML
* @return String HTML, tidied
*/
abstract public function cleanHTML($content);
/**
* Experimental inst class to create a default html cleaner class
2014-08-15 18:53:05 +12:00
*
* @return PurifierHTMLCleaner|TidyHTMLCleaner
*/
public static function inst() {
if (class_exists('HTMLPurifier')) return new PurifierHTMLCleaner();
elseif (class_exists('tidy')) return new TidyHTMLCleaner();
}
}
/**
* Cleans HTML using the HTMLPurifier package
* http://htmlpurifier.org/
*/
class PurifierHTMLCleaner extends HTMLCleaner {
2014-08-15 18:53:05 +12:00
public function cleanHTML($content) {
$html = new HTMLPurifier();
$doc = Injector::inst()->create('HTMLValue', $html->purify($content));
return $doc->getContent();
}
}
/**
* Cleans HTML using the Tidy package
* http://php.net/manual/en/book.tidy.php
*/
class TidyHTMLCleaner extends HTMLCleaner {
protected $defaultConfig = array(
'clean' => true,
'output-xhtml' => true,
'show-body-only' => true,
'wrap' => 0,
'doctype' => 'omit',
'input-encoding' => 'utf8',
'output-encoding' => 'utf8'
);
public function cleanHTML($content) {
$tidy = new tidy();
$output = $tidy->repairString($content, $this->config);
2014-08-15 18:53:05 +12:00
// Clean leading/trailing whitespace
return preg_replace('/(^\s+)|(\s+$)/', '', $output);
}
}