mirror of
https://github.com/silverstripe/silverstripe-framework
synced 2024-10-22 12:05:37 +00:00
Merge pull request #10647 from creative-commoners/pulls/5/dom-crawler
ENH Use masterminds/html5 for HTMLValue
This commit is contained in:
commit
a65d470e93
@ -2,10 +2,6 @@
|
|||||||
Name: corehtml
|
Name: corehtml
|
||||||
---
|
---
|
||||||
SilverStripe\Core\Injector\Injector:
|
SilverStripe\Core\Injector\Injector:
|
||||||
SilverStripe\View\Parsers\HTMLValue:
|
|
||||||
class: SilverStripe\View\Parsers\HTML4Value
|
|
||||||
# Shorthand
|
|
||||||
HTMLValue: '%$SilverStripe\View\Parsers\HTMLValue'
|
|
||||||
SilverStripe\Forms\HTMLEditor\HTMLEditorConfig:
|
SilverStripe\Forms\HTMLEditor\HTMLEditorConfig:
|
||||||
class: SilverStripe\Forms\HTMLEditor\TinyMCEConfig
|
class: SilverStripe\Forms\HTMLEditor\TinyMCEConfig
|
||||||
SilverStripe\Forms\HTMLEditor\TinyMCEScriptGenerator: '%$SilverStripe\Forms\HTMLEditor\TinyMCECombinedGenerator'
|
SilverStripe\Forms\HTMLEditor\TinyMCEScriptGenerator: '%$SilverStripe\Forms\HTMLEditor\TinyMCECombinedGenerator'
|
||||||
|
@ -29,6 +29,7 @@
|
|||||||
"embed/embed": "^4.4.7",
|
"embed/embed": "^4.4.7",
|
||||||
"league/csv": "^9.8.0",
|
"league/csv": "^9.8.0",
|
||||||
"m1/env": "^2.2.0",
|
"m1/env": "^2.2.0",
|
||||||
|
"masterminds/html5": "^2.7",
|
||||||
"monolog/monolog": "^3.2.0",
|
"monolog/monolog": "^3.2.0",
|
||||||
"nikic/php-parser": "^4.15.0",
|
"nikic/php-parser": "^4.15.0",
|
||||||
"psr/container": "^1.1 || ^2.0",
|
"psr/container": "^1.1 || ^2.0",
|
||||||
|
@ -82,7 +82,7 @@ class HTML
|
|||||||
if ($content) {
|
if ($content) {
|
||||||
throw new InvalidArgumentException("Void element \"{$tag}\" cannot have content");
|
throw new InvalidArgumentException("Void element \"{$tag}\" cannot have content");
|
||||||
}
|
}
|
||||||
return "<{$tag}{$preparedAttributes} />";
|
return "<{$tag}{$preparedAttributes}>";
|
||||||
}
|
}
|
||||||
|
|
||||||
// Closed tag type
|
// Closed tag type
|
||||||
|
@ -1,31 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
namespace SilverStripe\View\Parsers;
|
|
||||||
|
|
||||||
class HTML4Value extends HTMLValue
|
|
||||||
{
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param string $content
|
|
||||||
* @return bool
|
|
||||||
*/
|
|
||||||
public function setContent($content)
|
|
||||||
{
|
|
||||||
// Ensure that \r (carriage return) characters don't get replaced with " " entity by DOMDocument
|
|
||||||
// This behaviour is apparently XML spec, but we don't want this because it messes up the HTML
|
|
||||||
$content = str_replace(chr(13), '', $content ?? '');
|
|
||||||
|
|
||||||
// Reset the document if we're in an invalid state for some reason
|
|
||||||
if (!$this->isValid()) {
|
|
||||||
$this->setDocument(null);
|
|
||||||
}
|
|
||||||
|
|
||||||
$errorState = libxml_use_internal_errors(true);
|
|
||||||
$result = $this->getDocument()->loadHTML(
|
|
||||||
'<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head>' . "<body>$content</body></html>"
|
|
||||||
);
|
|
||||||
libxml_clear_errors();
|
|
||||||
libxml_use_internal_errors($errorState);
|
|
||||||
return $result;
|
|
||||||
}
|
|
||||||
}
|
|
@ -4,22 +4,20 @@ namespace SilverStripe\View\Parsers;
|
|||||||
|
|
||||||
use SilverStripe\Core\Convert;
|
use SilverStripe\Core\Convert;
|
||||||
use SilverStripe\View\ViewableData;
|
use SilverStripe\View\ViewableData;
|
||||||
|
use Masterminds\HTML5;
|
||||||
use DOMNodeList;
|
use DOMNodeList;
|
||||||
use DOMXPath;
|
use DOMXPath;
|
||||||
use DOMDocument;
|
use DOMDocument;
|
||||||
|
use SilverStripe\View\HTML;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class handles the converting of HTML fragments between a string and a DOMDocument based
|
* This class handles the converting of HTML fragments between a string and a DOMDocument based
|
||||||
* representation.
|
* representation.
|
||||||
*
|
*
|
||||||
* It's designed to allow dependency injection to replace the standard HTML4 version with one that
|
|
||||||
* handles XHTML or HTML5 instead
|
|
||||||
*
|
|
||||||
* @mixin DOMDocument
|
* @mixin DOMDocument
|
||||||
*/
|
*/
|
||||||
abstract class HTMLValue extends ViewableData
|
class HTMLValue extends ViewableData
|
||||||
{
|
{
|
||||||
|
|
||||||
public function __construct($fragment = null)
|
public function __construct($fragment = null)
|
||||||
{
|
{
|
||||||
if ($fragment) {
|
if ($fragment) {
|
||||||
@ -28,7 +26,25 @@ abstract class HTMLValue extends ViewableData
|
|||||||
parent::__construct();
|
parent::__construct();
|
||||||
}
|
}
|
||||||
|
|
||||||
abstract public function setContent($fragment);
|
/**
|
||||||
|
* @param string $content
|
||||||
|
* @return bool
|
||||||
|
*/
|
||||||
|
public function setContent($content)
|
||||||
|
{
|
||||||
|
$content = preg_replace('#</?(html|head|body)[^>]*>#si', '', $content);
|
||||||
|
$html5 = new HTML5(['disable_html_ns' => true]);
|
||||||
|
$document = $html5->loadHTML(
|
||||||
|
'<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head>' .
|
||||||
|
"<body>$content</body></html>"
|
||||||
|
);
|
||||||
|
if ($document) {
|
||||||
|
$this->setDocument($document);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
$this->valid = false;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return string
|
* @return string
|
||||||
|
@ -659,7 +659,7 @@ class ShortcodeParser
|
|||||||
list($content, $tags) = $this->replaceElementTagsWithMarkers($content);
|
list($content, $tags) = $this->replaceElementTagsWithMarkers($content);
|
||||||
|
|
||||||
/** @var HTMLValue $htmlvalue */
|
/** @var HTMLValue $htmlvalue */
|
||||||
$htmlvalue = Injector::inst()->create('HTMLValue', $content);
|
$htmlvalue = Injector::inst()->create(HTMLValue::class, $content);
|
||||||
|
|
||||||
// Now parse the result into a DOM
|
// Now parse the result into a DOM
|
||||||
if (!$htmlvalue->isValid()) {
|
if (!$htmlvalue->isValid()) {
|
||||||
|
@ -14,7 +14,7 @@ class HTMLTest extends SapphireTest
|
|||||||
'name' => 'description',
|
'name' => 'description',
|
||||||
'content' => 'test tag',
|
'content' => 'test tag',
|
||||||
]);
|
]);
|
||||||
$this->assertEquals('<meta name="description" content="test tag" />', $tag);
|
$this->assertEquals('<meta name="description" content="test tag">', $tag);
|
||||||
}
|
}
|
||||||
|
|
||||||
public function testEmptyAttributes()
|
public function testEmptyAttributes()
|
||||||
@ -27,7 +27,7 @@ class HTMLTest extends SapphireTest
|
|||||||
'disabled' => false,
|
'disabled' => false,
|
||||||
'readonly' => true,
|
'readonly' => true,
|
||||||
]);
|
]);
|
||||||
$this->assertEquals('<meta value="0" max="3" readonly="1" />', $tag);
|
$this->assertEquals('<meta value="0" max="3" readonly="1">', $tag);
|
||||||
}
|
}
|
||||||
|
|
||||||
public function testNormalTag()
|
public function testNormalTag()
|
||||||
@ -52,7 +52,7 @@ class HTMLTest extends SapphireTest
|
|||||||
'alt' => '',
|
'alt' => '',
|
||||||
]);
|
]);
|
||||||
|
|
||||||
$this->assertEquals('<img src="example.png" alt="" />', $tag);
|
$this->assertEquals('<img src="example.png" alt="">', $tag);
|
||||||
}
|
}
|
||||||
|
|
||||||
public function testVoidContentError()
|
public function testVoidContentError()
|
||||||
|
@ -1,98 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
namespace SilverStripe\View\Tests\Parsers;
|
|
||||||
|
|
||||||
use SilverStripe\Dev\SapphireTest;
|
|
||||||
use SilverStripe\View\Parsers\HTML4Value;
|
|
||||||
|
|
||||||
class HTML4ValueTest extends SapphireTest
|
|
||||||
{
|
|
||||||
public function testInvalidHTMLSaving()
|
|
||||||
{
|
|
||||||
$value = new HTML4Value();
|
|
||||||
|
|
||||||
$invalid = [
|
|
||||||
'<p>Enclosed Value</p></p>'
|
|
||||||
=> '<p>Enclosed Value</p>',
|
|
||||||
'<meta content="text/html"></meta>'
|
|
||||||
=> '<meta content="text/html">',
|
|
||||||
'<p><div class="example"></div></p>'
|
|
||||||
=> '<p></p><div class="example"></div>',
|
|
||||||
'<html><html><body><falsetag "attribute=""attribute""">'
|
|
||||||
=> '<falsetag></falsetag>',
|
|
||||||
'<body<body<body>/bodu>/body>'
|
|
||||||
=> '/bodu>/body>'
|
|
||||||
];
|
|
||||||
|
|
||||||
foreach ($invalid as $input => $expected) {
|
|
||||||
$value->setContent($input);
|
|
||||||
$this->assertEquals($expected, $value->getContent(), 'Invalid HTML can be saved');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public function testUtf8Saving()
|
|
||||||
{
|
|
||||||
$value = new HTML4Value();
|
|
||||||
|
|
||||||
$value->setContent('<p>ö ß ā い 家</p>');
|
|
||||||
$this->assertEquals('<p>ö ß ā い 家</p>', $value->getContent());
|
|
||||||
}
|
|
||||||
|
|
||||||
public function testInvalidHTMLTagNames()
|
|
||||||
{
|
|
||||||
$value = new HTML4Value();
|
|
||||||
|
|
||||||
$invalid = [
|
|
||||||
'<p><div><a href="test-link"></p></div>',
|
|
||||||
'<html><div><a href="test-link"></a></a></html_>',
|
|
||||||
'""\'\'\'"""\'""<<<>/</<htmlbody><a href="test-link"<<>'
|
|
||||||
];
|
|
||||||
|
|
||||||
foreach ($invalid as $input) {
|
|
||||||
$value->setContent($input);
|
|
||||||
$this->assertEquals(
|
|
||||||
'test-link',
|
|
||||||
$value->getElementsByTagName('a')->item(0)->getAttribute('href'),
|
|
||||||
'Link data can be extracted from malformed HTML'
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public function testMixedNewlines()
|
|
||||||
{
|
|
||||||
$value = new HTML4Value();
|
|
||||||
|
|
||||||
$value->setContent("<p>paragraph</p>\n<ul><li>1</li>\r\n</ul>");
|
|
||||||
$this->assertEquals(
|
|
||||||
"<p>paragraph</p>\n<ul><li>1</li>\n</ul>",
|
|
||||||
$value->getContent(),
|
|
||||||
'Newlines get converted'
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
public function testAttributeEscaping()
|
|
||||||
{
|
|
||||||
$value = new HTML4Value();
|
|
||||||
|
|
||||||
$value->setContent('<a href="[]"></a>');
|
|
||||||
$this->assertEquals('<a href="[]"></a>', $value->getContent(), "'[' character isn't escaped");
|
|
||||||
|
|
||||||
$value->setContent('<a href="""></a>');
|
|
||||||
$this->assertEquals('<a href="""></a>', $value->getContent(), "'\"' character is escaped");
|
|
||||||
}
|
|
||||||
|
|
||||||
public function testGetContent()
|
|
||||||
{
|
|
||||||
$value = new HTML4Value();
|
|
||||||
|
|
||||||
$value->setContent('<p>This is valid</p>');
|
|
||||||
$this->assertEquals('<p>This is valid</p>', $value->getContent(), "Valid content is returned");
|
|
||||||
|
|
||||||
$value->setContent('<p?< This is not really valid but it will get parsed into something valid');
|
|
||||||
// can sometimes get a this state where HTMLValue->valid is false
|
|
||||||
// for instance if a content editor saves something really weird in a LiteralField
|
|
||||||
// we can manually get to this state via ->setInvalid()
|
|
||||||
$value->setInvalid();
|
|
||||||
$this->assertEquals('', $value->getContent(), "Blank string is returned when invalid");
|
|
||||||
}
|
|
||||||
}
|
|
163
tests/php/View/Parsers/HTMLValueTest.php
Normal file
163
tests/php/View/Parsers/HTMLValueTest.php
Normal file
@ -0,0 +1,163 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace SilverStripe\View\Tests\Parsers;
|
||||||
|
|
||||||
|
use SilverStripe\Dev\SapphireTest;
|
||||||
|
use SilverStripe\View\Parsers\HTMLValue;
|
||||||
|
use SilverStripe\ORM\FieldType\DBHTMLText;
|
||||||
|
use SilverStripe\View\Parsers\ShortcodeParser;
|
||||||
|
use SilverStripe\Core\Convert;
|
||||||
|
|
||||||
|
class HTMLValueTest extends SapphireTest
|
||||||
|
{
|
||||||
|
public function testInvalidHTMLParsing()
|
||||||
|
{
|
||||||
|
$value = new HTMLValue();
|
||||||
|
|
||||||
|
$invalid = [
|
||||||
|
'<p>Enclosed Value</p><p>a' => '<p>Enclosed Value</p><p>a</p>',
|
||||||
|
'<meta content="text/html"></meta>' => '<meta content="text/html">',
|
||||||
|
'<p><div class="example"></div><p>' => '<p></p><div class="example"></div><p></p>',
|
||||||
|
'<html><html><body><falsetag "attribute=""attribute""">' => '<falsetag></falsetag>',
|
||||||
|
'<body<body<body>/bodu>/body>' => '/bodu>/body>'
|
||||||
|
];
|
||||||
|
|
||||||
|
foreach ($invalid as $input => $expected) {
|
||||||
|
$value->setContent($input);
|
||||||
|
$this->assertEquals($expected, $value->getContent(), 'Invalid HTML can be parsed');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testUtf8Saving()
|
||||||
|
{
|
||||||
|
$value = new HTMLValue();
|
||||||
|
|
||||||
|
$value->setContent('<p>ö ß ā い 家</p>');
|
||||||
|
$this->assertEquals('<p>ö ß ā い 家</p>', $value->getContent());
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testWhitespaceHandling()
|
||||||
|
{
|
||||||
|
$value = new HTMLValue();
|
||||||
|
|
||||||
|
$value->setContent('<p></p> <p></p>');
|
||||||
|
$this->assertEquals('<p></p> <p></p>', $value->getContent());
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testInvalidHTMLTagNames()
|
||||||
|
{
|
||||||
|
$value = new HTMLValue();
|
||||||
|
|
||||||
|
$invalid = [
|
||||||
|
'<p><div><a href="test-link"></p></div>',
|
||||||
|
'<html><div><a href="test-link"></a></a></html_>'
|
||||||
|
];
|
||||||
|
|
||||||
|
foreach ($invalid as $input) {
|
||||||
|
$value->setContent($input);
|
||||||
|
|
||||||
|
$this->assertEquals(
|
||||||
|
'test-link',
|
||||||
|
$value->getElementsByTagName('a')->item(0)->getAttribute('href'),
|
||||||
|
'Link data can be extraced from malformed HTML'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testMixedNewlines()
|
||||||
|
{
|
||||||
|
$value = new HTMLValue();
|
||||||
|
|
||||||
|
$value->setContent("<p>paragraph</p>\n<ul><li>1</li>\r\n</ul>");
|
||||||
|
$this->assertEquals(
|
||||||
|
"<p>paragraph</p>\n<ul><li>1</li>\n</ul>",
|
||||||
|
$value->getContent(),
|
||||||
|
'Newlines get converted'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testAttributeEscaping()
|
||||||
|
{
|
||||||
|
$value = new HTMLValue();
|
||||||
|
|
||||||
|
$value->setContent('<a href="[]"></a>');
|
||||||
|
$this->assertEquals('<a href="[]"></a>', $value->getContent(), "'[' character isn't escaped");
|
||||||
|
|
||||||
|
$value->setContent('<a href="""></a>');
|
||||||
|
$this->assertEquals('<a href="""></a>', $value->getContent(), "'\"' character is escaped");
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testShortcodeValue()
|
||||||
|
{
|
||||||
|
ShortcodeParser::get('default')->register(
|
||||||
|
'test_shortcode',
|
||||||
|
function () {
|
||||||
|
return 'bit of test shortcode output';
|
||||||
|
}
|
||||||
|
);
|
||||||
|
$content = DBHTMLText::create('Test', ['shortcodes' => true])
|
||||||
|
->setValue('<p>Some content with a [test_shortcode] and a <br /> followed by an <hr> in it.</p>')
|
||||||
|
->forTemplate();
|
||||||
|
$this->assertStringContainsString(
|
||||||
|
// hr is flow content, not phrasing content, so must be corrected to be outside the p tag.
|
||||||
|
'<p>Some content with a bit of test shortcode output and a <br> followed by an </p><hr> in it.',
|
||||||
|
$content
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testEntities()
|
||||||
|
{
|
||||||
|
$content = '<a href="http://domain.test/path?two&vars">ampersand & test & link</a>';
|
||||||
|
$output = new HTMLValue($content);
|
||||||
|
$output = $output->getContent();
|
||||||
|
$this->assertEquals(
|
||||||
|
'<a href="http://domain.test/path?two&vars">ampersand & test & link</a>',
|
||||||
|
$output
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testShortcodeEntities()
|
||||||
|
{
|
||||||
|
ShortcodeParser::get('default')->register(
|
||||||
|
'sitetree_link_test',
|
||||||
|
// A mildly stubbed copy from SilverStripe\CMS\Model\SiteTree::link_shortcode_handler
|
||||||
|
function ($arguments, $content = null, $parser = null) {
|
||||||
|
$link = Convert::raw2att('https://google.com/search?q=unit&test');
|
||||||
|
if ($content) {
|
||||||
|
$link = sprintf('<a href="%s">%s</a>', $link, $parser->parse($content));
|
||||||
|
}
|
||||||
|
return $link;
|
||||||
|
}
|
||||||
|
);
|
||||||
|
$content = [
|
||||||
|
'[sitetree_link_test,id=2]' => 'https://google.com/search?q=unit&test',
|
||||||
|
// the random [ triggers the shortcode parser, which seems to be where problems arise.
|
||||||
|
'<a href="https://google.com/search?q=unit&test"> [ non shortcode link</a>' =>
|
||||||
|
'<a href="https://google.com/search?q=unit&test"> [ non shortcode link</a>',
|
||||||
|
'[sitetree_link_test,id=1]test link[/sitetree_link_test]' =>
|
||||||
|
'<a href="https://google.com/search?q=unit&test">test link</a>'
|
||||||
|
];
|
||||||
|
foreach ($content as $input => $expected) {
|
||||||
|
$output = DBHTMLText::create('Test', ['shortcodes' => true])
|
||||||
|
->setValue($input)
|
||||||
|
->forTemplate();
|
||||||
|
$this->assertEquals($expected, $output);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testValidHTMLInNoscriptTags()
|
||||||
|
{
|
||||||
|
$value = new HTMLValue();
|
||||||
|
|
||||||
|
$noscripts = [
|
||||||
|
'<noscript><p>Enclosed Value</p></noscript>',
|
||||||
|
'<noscript><span class="test">Enclosed Value</span></noscript>',
|
||||||
|
'<noscript><img src="/test.jpg" alt="test"></noscript>',
|
||||||
|
];
|
||||||
|
|
||||||
|
foreach ($noscripts as $noscript) {
|
||||||
|
$value->setContent($noscript);
|
||||||
|
$this->assertEquals($noscript, $value->getContent(), 'Child tags are left untouched in noscript tags.');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user