mirror of
https://github.com/silverstripe/silverstripe-framework
synced 2024-10-22 14:05:37 +02:00
Merge pull request #10647 from creative-commoners/pulls/5/dom-crawler
ENH Use masterminds/html5 for HTMLValue
This commit is contained in:
commit
a65d470e93
@ -2,10 +2,6 @@
|
||||
Name: corehtml
|
||||
---
|
||||
SilverStripe\Core\Injector\Injector:
|
||||
SilverStripe\View\Parsers\HTMLValue:
|
||||
class: SilverStripe\View\Parsers\HTML4Value
|
||||
# Shorthand
|
||||
HTMLValue: '%$SilverStripe\View\Parsers\HTMLValue'
|
||||
SilverStripe\Forms\HTMLEditor\HTMLEditorConfig:
|
||||
class: SilverStripe\Forms\HTMLEditor\TinyMCEConfig
|
||||
SilverStripe\Forms\HTMLEditor\TinyMCEScriptGenerator: '%$SilverStripe\Forms\HTMLEditor\TinyMCECombinedGenerator'
|
||||
|
@ -29,6 +29,7 @@
|
||||
"embed/embed": "^4.4.7",
|
||||
"league/csv": "^9.8.0",
|
||||
"m1/env": "^2.2.0",
|
||||
"masterminds/html5": "^2.7",
|
||||
"monolog/monolog": "^3.2.0",
|
||||
"nikic/php-parser": "^4.15.0",
|
||||
"psr/container": "^1.1 || ^2.0",
|
||||
|
@ -82,7 +82,7 @@ class HTML
|
||||
if ($content) {
|
||||
throw new InvalidArgumentException("Void element \"{$tag}\" cannot have content");
|
||||
}
|
||||
return "<{$tag}{$preparedAttributes} />";
|
||||
return "<{$tag}{$preparedAttributes}>";
|
||||
}
|
||||
|
||||
// Closed tag type
|
||||
|
@ -1,31 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace SilverStripe\View\Parsers;
|
||||
|
||||
class HTML4Value extends HTMLValue
|
||||
{
|
||||
|
||||
/**
|
||||
* @param string $content
|
||||
* @return bool
|
||||
*/
|
||||
public function setContent($content)
|
||||
{
|
||||
// Ensure that \r (carriage return) characters don't get replaced with " " entity by DOMDocument
|
||||
// This behaviour is apparently XML spec, but we don't want this because it messes up the HTML
|
||||
$content = str_replace(chr(13), '', $content ?? '');
|
||||
|
||||
// Reset the document if we're in an invalid state for some reason
|
||||
if (!$this->isValid()) {
|
||||
$this->setDocument(null);
|
||||
}
|
||||
|
||||
$errorState = libxml_use_internal_errors(true);
|
||||
$result = $this->getDocument()->loadHTML(
|
||||
'<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head>' . "<body>$content</body></html>"
|
||||
);
|
||||
libxml_clear_errors();
|
||||
libxml_use_internal_errors($errorState);
|
||||
return $result;
|
||||
}
|
||||
}
|
@ -4,22 +4,20 @@ namespace SilverStripe\View\Parsers;
|
||||
|
||||
use SilverStripe\Core\Convert;
|
||||
use SilverStripe\View\ViewableData;
|
||||
use Masterminds\HTML5;
|
||||
use DOMNodeList;
|
||||
use DOMXPath;
|
||||
use DOMDocument;
|
||||
use SilverStripe\View\HTML;
|
||||
|
||||
/**
|
||||
* This class handles the converting of HTML fragments between a string and a DOMDocument based
|
||||
* representation.
|
||||
*
|
||||
* It's designed to allow dependency injection to replace the standard HTML4 version with one that
|
||||
* handles XHTML or HTML5 instead
|
||||
*
|
||||
* @mixin DOMDocument
|
||||
*/
|
||||
abstract class HTMLValue extends ViewableData
|
||||
class HTMLValue extends ViewableData
|
||||
{
|
||||
|
||||
public function __construct($fragment = null)
|
||||
{
|
||||
if ($fragment) {
|
||||
@ -28,7 +26,25 @@ abstract class HTMLValue extends ViewableData
|
||||
parent::__construct();
|
||||
}
|
||||
|
||||
abstract public function setContent($fragment);
|
||||
/**
|
||||
* @param string $content
|
||||
* @return bool
|
||||
*/
|
||||
public function setContent($content)
|
||||
{
|
||||
$content = preg_replace('#</?(html|head|body)[^>]*>#si', '', $content);
|
||||
$html5 = new HTML5(['disable_html_ns' => true]);
|
||||
$document = $html5->loadHTML(
|
||||
'<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head>' .
|
||||
"<body>$content</body></html>"
|
||||
);
|
||||
if ($document) {
|
||||
$this->setDocument($document);
|
||||
return true;
|
||||
}
|
||||
$this->valid = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string
|
||||
|
@ -658,8 +658,8 @@ class ShortcodeParser
|
||||
// use a proper DOM
|
||||
list($content, $tags) = $this->replaceElementTagsWithMarkers($content);
|
||||
|
||||
/** @var HTMLValue $htmlvalue */
|
||||
$htmlvalue = Injector::inst()->create('HTMLValue', $content);
|
||||
/** @var HTMLValue $htmlvalue */
|
||||
$htmlvalue = Injector::inst()->create(HTMLValue::class, $content);
|
||||
|
||||
// Now parse the result into a DOM
|
||||
if (!$htmlvalue->isValid()) {
|
||||
|
@ -14,7 +14,7 @@ class HTMLTest extends SapphireTest
|
||||
'name' => 'description',
|
||||
'content' => 'test tag',
|
||||
]);
|
||||
$this->assertEquals('<meta name="description" content="test tag" />', $tag);
|
||||
$this->assertEquals('<meta name="description" content="test tag">', $tag);
|
||||
}
|
||||
|
||||
public function testEmptyAttributes()
|
||||
@ -27,7 +27,7 @@ class HTMLTest extends SapphireTest
|
||||
'disabled' => false,
|
||||
'readonly' => true,
|
||||
]);
|
||||
$this->assertEquals('<meta value="0" max="3" readonly="1" />', $tag);
|
||||
$this->assertEquals('<meta value="0" max="3" readonly="1">', $tag);
|
||||
}
|
||||
|
||||
public function testNormalTag()
|
||||
@ -52,7 +52,7 @@ class HTMLTest extends SapphireTest
|
||||
'alt' => '',
|
||||
]);
|
||||
|
||||
$this->assertEquals('<img src="example.png" alt="" />', $tag);
|
||||
$this->assertEquals('<img src="example.png" alt="">', $tag);
|
||||
}
|
||||
|
||||
public function testVoidContentError()
|
||||
|
@ -1,98 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace SilverStripe\View\Tests\Parsers;
|
||||
|
||||
use SilverStripe\Dev\SapphireTest;
|
||||
use SilverStripe\View\Parsers\HTML4Value;
|
||||
|
||||
class HTML4ValueTest extends SapphireTest
|
||||
{
|
||||
public function testInvalidHTMLSaving()
|
||||
{
|
||||
$value = new HTML4Value();
|
||||
|
||||
$invalid = [
|
||||
'<p>Enclosed Value</p></p>'
|
||||
=> '<p>Enclosed Value</p>',
|
||||
'<meta content="text/html"></meta>'
|
||||
=> '<meta content="text/html">',
|
||||
'<p><div class="example"></div></p>'
|
||||
=> '<p></p><div class="example"></div>',
|
||||
'<html><html><body><falsetag "attribute=""attribute""">'
|
||||
=> '<falsetag></falsetag>',
|
||||
'<body<body<body>/bodu>/body>'
|
||||
=> '/bodu>/body>'
|
||||
];
|
||||
|
||||
foreach ($invalid as $input => $expected) {
|
||||
$value->setContent($input);
|
||||
$this->assertEquals($expected, $value->getContent(), 'Invalid HTML can be saved');
|
||||
}
|
||||
}
|
||||
|
||||
public function testUtf8Saving()
|
||||
{
|
||||
$value = new HTML4Value();
|
||||
|
||||
$value->setContent('<p>ö ß ā い 家</p>');
|
||||
$this->assertEquals('<p>ö ß ā い 家</p>', $value->getContent());
|
||||
}
|
||||
|
||||
public function testInvalidHTMLTagNames()
|
||||
{
|
||||
$value = new HTML4Value();
|
||||
|
||||
$invalid = [
|
||||
'<p><div><a href="test-link"></p></div>',
|
||||
'<html><div><a href="test-link"></a></a></html_>',
|
||||
'""\'\'\'"""\'""<<<>/</<htmlbody><a href="test-link"<<>'
|
||||
];
|
||||
|
||||
foreach ($invalid as $input) {
|
||||
$value->setContent($input);
|
||||
$this->assertEquals(
|
||||
'test-link',
|
||||
$value->getElementsByTagName('a')->item(0)->getAttribute('href'),
|
||||
'Link data can be extracted from malformed HTML'
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
public function testMixedNewlines()
|
||||
{
|
||||
$value = new HTML4Value();
|
||||
|
||||
$value->setContent("<p>paragraph</p>\n<ul><li>1</li>\r\n</ul>");
|
||||
$this->assertEquals(
|
||||
"<p>paragraph</p>\n<ul><li>1</li>\n</ul>",
|
||||
$value->getContent(),
|
||||
'Newlines get converted'
|
||||
);
|
||||
}
|
||||
|
||||
public function testAttributeEscaping()
|
||||
{
|
||||
$value = new HTML4Value();
|
||||
|
||||
$value->setContent('<a href="[]"></a>');
|
||||
$this->assertEquals('<a href="[]"></a>', $value->getContent(), "'[' character isn't escaped");
|
||||
|
||||
$value->setContent('<a href="""></a>');
|
||||
$this->assertEquals('<a href="""></a>', $value->getContent(), "'\"' character is escaped");
|
||||
}
|
||||
|
||||
public function testGetContent()
|
||||
{
|
||||
$value = new HTML4Value();
|
||||
|
||||
$value->setContent('<p>This is valid</p>');
|
||||
$this->assertEquals('<p>This is valid</p>', $value->getContent(), "Valid content is returned");
|
||||
|
||||
$value->setContent('<p?< This is not really valid but it will get parsed into something valid');
|
||||
// can sometimes get a this state where HTMLValue->valid is false
|
||||
// for instance if a content editor saves something really weird in a LiteralField
|
||||
// we can manually get to this state via ->setInvalid()
|
||||
$value->setInvalid();
|
||||
$this->assertEquals('', $value->getContent(), "Blank string is returned when invalid");
|
||||
}
|
||||
}
|
163
tests/php/View/Parsers/HTMLValueTest.php
Normal file
163
tests/php/View/Parsers/HTMLValueTest.php
Normal file
@ -0,0 +1,163 @@
|
||||
<?php
|
||||
|
||||
namespace SilverStripe\View\Tests\Parsers;
|
||||
|
||||
use SilverStripe\Dev\SapphireTest;
|
||||
use SilverStripe\View\Parsers\HTMLValue;
|
||||
use SilverStripe\ORM\FieldType\DBHTMLText;
|
||||
use SilverStripe\View\Parsers\ShortcodeParser;
|
||||
use SilverStripe\Core\Convert;
|
||||
|
||||
class HTMLValueTest extends SapphireTest
|
||||
{
|
||||
public function testInvalidHTMLParsing()
|
||||
{
|
||||
$value = new HTMLValue();
|
||||
|
||||
$invalid = [
|
||||
'<p>Enclosed Value</p><p>a' => '<p>Enclosed Value</p><p>a</p>',
|
||||
'<meta content="text/html"></meta>' => '<meta content="text/html">',
|
||||
'<p><div class="example"></div><p>' => '<p></p><div class="example"></div><p></p>',
|
||||
'<html><html><body><falsetag "attribute=""attribute""">' => '<falsetag></falsetag>',
|
||||
'<body<body<body>/bodu>/body>' => '/bodu>/body>'
|
||||
];
|
||||
|
||||
foreach ($invalid as $input => $expected) {
|
||||
$value->setContent($input);
|
||||
$this->assertEquals($expected, $value->getContent(), 'Invalid HTML can be parsed');
|
||||
}
|
||||
}
|
||||
|
||||
public function testUtf8Saving()
|
||||
{
|
||||
$value = new HTMLValue();
|
||||
|
||||
$value->setContent('<p>ö ß ā い 家</p>');
|
||||
$this->assertEquals('<p>ö ß ā い 家</p>', $value->getContent());
|
||||
}
|
||||
|
||||
public function testWhitespaceHandling()
|
||||
{
|
||||
$value = new HTMLValue();
|
||||
|
||||
$value->setContent('<p></p> <p></p>');
|
||||
$this->assertEquals('<p></p> <p></p>', $value->getContent());
|
||||
}
|
||||
|
||||
public function testInvalidHTMLTagNames()
|
||||
{
|
||||
$value = new HTMLValue();
|
||||
|
||||
$invalid = [
|
||||
'<p><div><a href="test-link"></p></div>',
|
||||
'<html><div><a href="test-link"></a></a></html_>'
|
||||
];
|
||||
|
||||
foreach ($invalid as $input) {
|
||||
$value->setContent($input);
|
||||
|
||||
$this->assertEquals(
|
||||
'test-link',
|
||||
$value->getElementsByTagName('a')->item(0)->getAttribute('href'),
|
||||
'Link data can be extraced from malformed HTML'
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
public function testMixedNewlines()
|
||||
{
|
||||
$value = new HTMLValue();
|
||||
|
||||
$value->setContent("<p>paragraph</p>\n<ul><li>1</li>\r\n</ul>");
|
||||
$this->assertEquals(
|
||||
"<p>paragraph</p>\n<ul><li>1</li>\n</ul>",
|
||||
$value->getContent(),
|
||||
'Newlines get converted'
|
||||
);
|
||||
}
|
||||
|
||||
public function testAttributeEscaping()
|
||||
{
|
||||
$value = new HTMLValue();
|
||||
|
||||
$value->setContent('<a href="[]"></a>');
|
||||
$this->assertEquals('<a href="[]"></a>', $value->getContent(), "'[' character isn't escaped");
|
||||
|
||||
$value->setContent('<a href="""></a>');
|
||||
$this->assertEquals('<a href="""></a>', $value->getContent(), "'\"' character is escaped");
|
||||
}
|
||||
|
||||
public function testShortcodeValue()
|
||||
{
|
||||
ShortcodeParser::get('default')->register(
|
||||
'test_shortcode',
|
||||
function () {
|
||||
return 'bit of test shortcode output';
|
||||
}
|
||||
);
|
||||
$content = DBHTMLText::create('Test', ['shortcodes' => true])
|
||||
->setValue('<p>Some content with a [test_shortcode] and a <br /> followed by an <hr> in it.</p>')
|
||||
->forTemplate();
|
||||
$this->assertStringContainsString(
|
||||
// hr is flow content, not phrasing content, so must be corrected to be outside the p tag.
|
||||
'<p>Some content with a bit of test shortcode output and a <br> followed by an </p><hr> in it.',
|
||||
$content
|
||||
);
|
||||
}
|
||||
|
||||
public function testEntities()
|
||||
{
|
||||
$content = '<a href="http://domain.test/path?two&vars">ampersand & test & link</a>';
|
||||
$output = new HTMLValue($content);
|
||||
$output = $output->getContent();
|
||||
$this->assertEquals(
|
||||
'<a href="http://domain.test/path?two&vars">ampersand & test & link</a>',
|
||||
$output
|
||||
);
|
||||
}
|
||||
|
||||
public function testShortcodeEntities()
|
||||
{
|
||||
ShortcodeParser::get('default')->register(
|
||||
'sitetree_link_test',
|
||||
// A mildly stubbed copy from SilverStripe\CMS\Model\SiteTree::link_shortcode_handler
|
||||
function ($arguments, $content = null, $parser = null) {
|
||||
$link = Convert::raw2att('https://google.com/search?q=unit&test');
|
||||
if ($content) {
|
||||
$link = sprintf('<a href="%s">%s</a>', $link, $parser->parse($content));
|
||||
}
|
||||
return $link;
|
||||
}
|
||||
);
|
||||
$content = [
|
||||
'[sitetree_link_test,id=2]' => 'https://google.com/search?q=unit&test',
|
||||
// the random [ triggers the shortcode parser, which seems to be where problems arise.
|
||||
'<a href="https://google.com/search?q=unit&test"> [ non shortcode link</a>' =>
|
||||
'<a href="https://google.com/search?q=unit&test"> [ non shortcode link</a>',
|
||||
'[sitetree_link_test,id=1]test link[/sitetree_link_test]' =>
|
||||
'<a href="https://google.com/search?q=unit&test">test link</a>'
|
||||
];
|
||||
foreach ($content as $input => $expected) {
|
||||
$output = DBHTMLText::create('Test', ['shortcodes' => true])
|
||||
->setValue($input)
|
||||
->forTemplate();
|
||||
$this->assertEquals($expected, $output);
|
||||
}
|
||||
}
|
||||
|
||||
public function testValidHTMLInNoscriptTags()
|
||||
{
|
||||
$value = new HTMLValue();
|
||||
|
||||
$noscripts = [
|
||||
'<noscript><p>Enclosed Value</p></noscript>',
|
||||
'<noscript><span class="test">Enclosed Value</span></noscript>',
|
||||
'<noscript><img src="/test.jpg" alt="test"></noscript>',
|
||||
];
|
||||
|
||||
foreach ($noscripts as $noscript) {
|
||||
$value->setContent($noscript);
|
||||
$this->assertEquals($noscript, $value->getContent(), 'Child tags are left untouched in noscript tags.');
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user