mirror of
https://github.com/silverstripe/silverstripe-framework
synced 2024-10-22 14:05:37 +02:00
Merge pull request #2182 from hafriedlander/fix/sanitise
FIX HtmlEditorField not re-checking sanitisation server side
This commit is contained in:
commit
3ac22ed638
@ -35,6 +35,7 @@
|
||||
* Optional integration with ImageMagick as a new image manipulation backend
|
||||
* Support for PHP 5.4's built-in webserver
|
||||
* Support for [Composer](http://getcomposer.org) dependency manager (also works with 3.0)
|
||||
* Added support for filtering incoming HTML from TinyMCE (disabled by default, see [security](/topics/security))
|
||||
|
||||
## Upgrading
|
||||
|
||||
|
@ -272,6 +272,15 @@ Some rules of thumb:
|
||||
* Don't concatenate URLs in a template. It only works in extremely simple cases that usually contain bugs.
|
||||
* Use *Controller::join_links()* to concatenate URLs. It deals with query strings and other such edge cases.
|
||||
|
||||
### Filtering incoming HTML from TinyMCE
|
||||
|
||||
In some cases you may be particularly concerned about which HTML elements are addable to Content via the CMS.
|
||||
By default, although TinyMCE is configured to restrict some dangerous tags (such as `script` tags), this restriction
|
||||
is not enforced server-side. A malicious user with write access to the CMS might create a specific request to avoid
|
||||
these restrictions.
|
||||
|
||||
To enable server side filtering using the same whitelisting controls as TinyMCE, set the
|
||||
HtmlEditorField::$sanitise_server_side config property to true.
|
||||
|
||||
## Cross-Site Request Forgery (CSRF)
|
||||
|
||||
|
@ -20,6 +20,12 @@ class HtmlEditorField extends TextareaField {
|
||||
*/
|
||||
private static $insert_width = 600;
|
||||
|
||||
/**
|
||||
* @config
|
||||
* @var bool Should we check the valid_elements (& extended_valid_elements) rules from HtmlEditorConfig server side?
|
||||
*/
|
||||
private static $sanitise_server_side = false;
|
||||
|
||||
protected $rows = 30;
|
||||
|
||||
/**
|
||||
@ -111,7 +117,12 @@ class HtmlEditorField extends TextareaField {
|
||||
$linkedFiles = array();
|
||||
|
||||
$htmlValue = Injector::inst()->create('HTMLValue', $this->value);
|
||||
|
||||
|
||||
if($this->config()->sanitise_server_side) {
|
||||
$santiser = Injector::inst()->create('HtmlEditorSanitiser', HtmlEditorConfig::get_active());
|
||||
$santiser->sanitise($htmlValue);
|
||||
}
|
||||
|
||||
if(class_exists('SiteTree')) {
|
||||
// Populate link tracking for internal links & links to asset files.
|
||||
if($links = $htmlValue->getElementsByTagName('a')) foreach($links as $link) {
|
||||
|
304
forms/HtmlEditorSanitiser.php
Normal file
304
forms/HtmlEditorSanitiser.php
Normal file
@ -0,0 +1,304 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Sanitises an HTMLValue so it's contents are the elements and attributes that are whitelisted
|
||||
* using the same configuration as TinyMCE
|
||||
*
|
||||
* See www.tinymce.com/wiki.php/configuration:valid_elements for details on the spec of TinyMCE's
|
||||
* whitelist configuration
|
||||
*
|
||||
* Class HtmlEditorSanitiser
|
||||
*/
|
||||
class HtmlEditorSanitiser {
|
||||
|
||||
/** @var [stdClass] - $element => $rule hash for whitelist element rules where the element name isn't a pattern */
|
||||
protected $elements = array();
|
||||
/** @var [stdClass] - Sequential list of whitelist element rules where the element name is a pattern */
|
||||
protected $elementPatterns = array();
|
||||
|
||||
/** @var [stdClass] - The list of attributes that apply to all further whitelisted elements added */
|
||||
protected $globalAttributes = array();
|
||||
|
||||
/**
|
||||
* Construct a sanitiser from a given HtmlEditorConfig
|
||||
*
|
||||
* Note that we build data structures from the current state of HtmlEditorConfig - later changes to
|
||||
* the passed instance won't cause this instance to update it's whitelist
|
||||
*
|
||||
* @param HtmlEditorConfig $config
|
||||
*/
|
||||
public function __construct(HtmlEditorConfig $config) {
|
||||
$valid = $config->getOption('valid_elements');
|
||||
if ($valid) $this->addValidElements($valid);
|
||||
|
||||
$valid = $config->getOption('extended_valid_elements');
|
||||
if ($valid) $this->addValidElements($valid);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a TinyMCE pattern (close to unix glob style), create a regex that does the match
|
||||
*
|
||||
* @param $str - The TinyMCE pattern
|
||||
* @return string - The equivalent regex
|
||||
*/
|
||||
protected function patternToRegex($str) {
|
||||
return '/^' . preg_replace('/([?+*])/', '.$1', $str) . '$/';
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a valid_elements string, parse out the actual element and attribute rules and add to the
|
||||
* internal whitelist
|
||||
*
|
||||
* Logic based heavily on javascript version from tiny_mce_src.js
|
||||
*
|
||||
* @param string $validElements - The valid_elements or extended_valid_elements string to add to the whitelist
|
||||
*/
|
||||
protected function addValidElements($validElements) {
|
||||
$elementRuleRegExp = '/^([#+\-])?([^\[\/]+)(?:\/([^\[]+))?(?:\[([^\]]+)\])?$/';
|
||||
$attrRuleRegExp = '/^([!\-])?(\w+::\w+|[^=:<]+)?(?:([=:<])(.*))?$/';
|
||||
$hasPatternsRegExp = '/[*?+]/';
|
||||
|
||||
foreach(explode(',', $validElements) as $validElement) {
|
||||
if(preg_match($elementRuleRegExp, $validElement, $matches)) {
|
||||
|
||||
$prefix = @$matches[1];
|
||||
$elementName = @$matches[2];
|
||||
$outputName = @$matches[3];
|
||||
$attrData = @$matches[4];
|
||||
|
||||
// Create the new element
|
||||
$element = new stdClass();
|
||||
$element->attributes = array();
|
||||
$element->attributePatterns = array();
|
||||
|
||||
$element->attributesRequired = array();
|
||||
$element->attributesDefault = array();
|
||||
$element->attributesForced = array();
|
||||
|
||||
foreach(array('#' => 'paddEmpty', '-' => 'removeEmpty') as $match => $means) {
|
||||
$element->$means = ($prefix === $match);
|
||||
}
|
||||
|
||||
// Copy attributes from global rule into current rule
|
||||
if($this->globalAttributes) {
|
||||
$element->attributes = array_merge($element->attributes, $this->globalAttributes);
|
||||
}
|
||||
|
||||
// Attributes defined
|
||||
if($attrData) {
|
||||
foreach(explode('|', $attrData) as $attr) {
|
||||
if(preg_match($attrRuleRegExp, $attr, $matches)) {
|
||||
$attr = new stdClass();
|
||||
|
||||
$attrType = @$matches[1];
|
||||
$attrName = str_replace('::', ':', @$matches[2]);
|
||||
$prefix = @$matches[3];
|
||||
$value = @$matches[4];
|
||||
|
||||
// Required
|
||||
if($attrType === '!') {
|
||||
$element->attributesRequired[] = $attrName;
|
||||
$attr->required = true;
|
||||
}
|
||||
|
||||
// Denied from global
|
||||
else if($attrType === '-') {
|
||||
unset($element->attributes[$attrName]);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Default value
|
||||
if($prefix) {
|
||||
// Default value
|
||||
if($prefix === '=') {
|
||||
$element->attributesDefault[$attrName] = $value;
|
||||
$attr->defaultValue = $value;
|
||||
}
|
||||
|
||||
// Forced value
|
||||
else if($prefix === ':') {
|
||||
$element->attributesForced[$attrName] = $value;
|
||||
$attr->forcedValue = $value;
|
||||
}
|
||||
|
||||
// Required values
|
||||
else if($prefix === '<') {
|
||||
$attr->validValues = explode('?', $value);
|
||||
}
|
||||
}
|
||||
|
||||
// Check for attribute patterns
|
||||
if(preg_match($hasPatternsRegExp, $attrName)) {
|
||||
$attr->pattern = $this->patternToRegex($attrName);
|
||||
$element->attributePatterns[] = $attr;
|
||||
}
|
||||
else {
|
||||
$element->attributes[$attrName] = $attr;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Global rule, store away these for later usage
|
||||
if(!$this->globalAttributes && $elementName == '@') {
|
||||
$this->globalAttributes = $element->attributes;
|
||||
}
|
||||
|
||||
// Handle substitute elements such as b/strong
|
||||
if($outputName) {
|
||||
$element->outputName = $elementName;
|
||||
$this->elements[$outputName] = $element;
|
||||
}
|
||||
|
||||
// Add pattern or exact element
|
||||
if(preg_match($hasPatternsRegExp, $elementName)) {
|
||||
$element->pattern = $this->patternToRegex($elementName);
|
||||
$this->elementPatterns[] = $element;
|
||||
}
|
||||
else {
|
||||
$this->elements[$elementName] = $element;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given an element tag, return the rule structure for that element
|
||||
* @param string $tag - The element tag
|
||||
* @return stdClass - The element rule
|
||||
*/
|
||||
protected function getRuleForElement($tag) {
|
||||
if(isset($this->elements[$tag])) {
|
||||
return $this->elements[$tag];
|
||||
}
|
||||
else foreach($this->elementPatterns as $element) {
|
||||
if(preg_match($element->pattern, $tag)) return $element;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given an attribute name, return the rule structure for that attribute
|
||||
* @param string $name - The attribute name
|
||||
* @return stdClass - The attribute rule
|
||||
*/
|
||||
protected function getRuleForAttribute($elementRule, $name) {
|
||||
if(isset($elementRule->attributes[$name])) {
|
||||
return $elementRule->attributes[$name];
|
||||
}
|
||||
else foreach($elementRule->attributePatterns as $attribute) {
|
||||
if(preg_match($attribute->pattern, $name)) return $attribute;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a DOMElement and an element rule, check if that element passes the rule
|
||||
* @param DOMElement $element - the element to check
|
||||
* @param stdClass $rule - the rule to check against
|
||||
* @return bool - true if the element passes (and so can be kept), false if it fails (and so needs stripping)
|
||||
*/
|
||||
protected function elementMatchesRule($element, $rule = null) {
|
||||
// If the rule doesn't exist at all, the element isn't allowed
|
||||
if(!$rule) return false;
|
||||
|
||||
// If the rule has attributes required, check them to see if this element has at least one
|
||||
if($rule->attributesRequired) {
|
||||
$hasMatch = false;
|
||||
|
||||
foreach($rule->attributesRequired as $attr) {
|
||||
if($element->getAttribute($attr)) {
|
||||
$hasMatch = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(!$hasMatch) return false;
|
||||
}
|
||||
|
||||
// If the rule says to remove empty elements, and this element is empty, remove it
|
||||
if($rule->removeEmpty && !$element->firstChild) return false;
|
||||
|
||||
// No further tests required, element passes
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a DOMAttr and an attribute rule, check if that attribute passes the rule
|
||||
* @param DOMAttr $attr - the attribute to check
|
||||
* @param stdClass $rule - the rule to check against
|
||||
* @return bool - true if the attribute passes (and so can be kept), false if it fails (and so needs stripping)
|
||||
*/
|
||||
protected function attributeMatchesRule($attr, $rule = null) {
|
||||
// If the rule doesn't exist at all, the attribute isn't allowed
|
||||
if(!$rule) return false;
|
||||
|
||||
// If the rule has a set of valid values, check them to see if this attribute is one
|
||||
if(isset($rule->validValues) && !in_array($attr->value, $rule->validValues)) return false;
|
||||
|
||||
// No further tests required, attribute passes
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given an SS_HTMLValue instance, will remove and elements and attributes that are
|
||||
* not explicitly included in the whitelist passed to __construct on instance creation
|
||||
*
|
||||
* @param SS_HTMLValue $html - The HTMLValue to remove any non-whitelisted elements & attributes from
|
||||
*/
|
||||
public function sanitise (SS_HTMLValue $html) {
|
||||
if(!$this->elements && !$this->elementPatterns) return;
|
||||
|
||||
$doc = $html->getDocument();
|
||||
|
||||
foreach($html->query('//body//*') as $el) {
|
||||
$elementRule = $this->getRuleForElement($el->tagName);
|
||||
|
||||
// If this element isn't allowed, strip it
|
||||
if(!$this->elementMatchesRule($el, $elementRule)) {
|
||||
// If it's a script or style, we don't keep contents
|
||||
if($el->tagName === 'script' || $el->tagName === 'style') {
|
||||
$el->parentNode->removeChild($el);
|
||||
}
|
||||
// Otherwise we replace this node with all it's children
|
||||
else {
|
||||
// First, create a new fragment with all of $el's children moved into it
|
||||
$frag = $doc->createDocumentFragment();
|
||||
while($el->firstChild) $frag->appendChild($el->firstChild);
|
||||
|
||||
// Then replace $el with the frags contents (which used to be it's children)
|
||||
$el->parentNode->replaceChild($frag, $el);
|
||||
}
|
||||
}
|
||||
// Otherwise tidy the element
|
||||
else {
|
||||
// First, if we're supposed to pad & this element is empty, fix that
|
||||
if($elementRule->paddEmpty && !$el->firstChild) {
|
||||
$el->nodeValue = ' ';
|
||||
}
|
||||
|
||||
// Then filter out any non-whitelisted attributes
|
||||
$children = $el->attributes;
|
||||
$i = $children->length;
|
||||
while($i--) {
|
||||
$attr = $children->item($i);
|
||||
$attributeRule = $this->getRuleForAttribute($elementRule, $attr->name);
|
||||
|
||||
// If this attribute isn't allowed, strip it
|
||||
if(!$this->attributeMatchesRule($attr, $attributeRule)) {
|
||||
$el->removeAttributeNode($attr);
|
||||
}
|
||||
}
|
||||
|
||||
// Then enforce any default attributes
|
||||
foreach($elementRule->attributesDefault as $attr => $default) {
|
||||
if(!$el->getAttribute($attr)) $el->setAttribute($attr, $default);
|
||||
}
|
||||
|
||||
// And any forced attributes
|
||||
foreach($elementRule->attributesForced as $attr => $forced) {
|
||||
$el->setAttribute($attr, $forced);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
57
tests/forms/HtmlEditorSanitiserTest.php
Normal file
57
tests/forms/HtmlEditorSanitiserTest.php
Normal file
@ -0,0 +1,57 @@
|
||||
<?php
|
||||
/**
|
||||
* @package framework
|
||||
* @subpackage tests
|
||||
*/
|
||||
class HtmlEditorSanitiserTest extends FunctionalTest {
|
||||
|
||||
public function testSanitisation() {
|
||||
$tests = array(
|
||||
array(
|
||||
'p,strong',
|
||||
'<p>Leave Alone</p><div>Strip parent<strong>But keep children</strong> in order</div>',
|
||||
'<p>Leave Alone</p>Strip parent<strong>But keep children</strong> in order',
|
||||
'Non-whitelisted elements are stripped, but children are kept'
|
||||
),
|
||||
array(
|
||||
'p,strong',
|
||||
'<div>A <strong>B <div>Nested elements are still filtered</div> C</strong> D</div>',
|
||||
'A <strong>B Nested elements are still filtered C</strong> D',
|
||||
'Non-whitelisted elements are stripped even when children of non-whitelisted elements'
|
||||
),
|
||||
array(
|
||||
'p',
|
||||
'<p>Keep</p><script>Strip <strong>including children</strong></script>',
|
||||
'<p>Keep</p>',
|
||||
'Non-whitelisted script elements are totally stripped, including any children'
|
||||
),
|
||||
array(
|
||||
'p[id]',
|
||||
'<p id="keep" bad="strip">Test</p>',
|
||||
'<p id="keep">Test</p>',
|
||||
'Non-whitelisted attributes are stripped'
|
||||
),
|
||||
array(
|
||||
'p[default1=default1|default2=default2|force1:force1|force2:force2]',
|
||||
'<p default1="specific1" force1="specific1">Test</p>',
|
||||
'<p default1="specific1" force1="force1" default2="default2" force2="force2">Test</p>',
|
||||
'Default attributes are set when not present in input, forced attributes are always set'
|
||||
)
|
||||
);
|
||||
|
||||
$config = HtmlEditorConfig::get('htmleditorsanitisertest');
|
||||
|
||||
foreach($tests as $test) {
|
||||
list($validElements, $input, $output, $desc) = $test;
|
||||
|
||||
$config->setOptions(array('valid_elements' => $validElements));
|
||||
$sanitiser = new HtmlEditorSanitiser($config);
|
||||
|
||||
$htmlValue = Injector::inst()->create('HTMLValue', $input);
|
||||
$sanitiser->sanitise($htmlValue);
|
||||
|
||||
$this->assertEquals($output, $htmlValue->getContent(), $desc);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user