diff --git a/_config/html.yml b/_config/html.yml index 83b30d3db..1beef739c 100644 --- a/_config/html.yml +++ b/_config/html.yml @@ -2,10 +2,6 @@ Name: corehtml --- SilverStripe\Core\Injector\Injector: - SilverStripe\View\Parsers\HTMLValue: - class: SilverStripe\View\Parsers\HTML4Value - # Shorthand - HTMLValue: '%$SilverStripe\View\Parsers\HTMLValue' SilverStripe\Forms\HTMLEditor\HTMLEditorConfig: class: SilverStripe\Forms\HTMLEditor\TinyMCEConfig SilverStripe\Forms\HTMLEditor\TinyMCEScriptGenerator: '%$SilverStripe\Forms\HTMLEditor\TinyMCECombinedGenerator' diff --git a/composer.json b/composer.json index 04433a977..5fec802cf 100644 --- a/composer.json +++ b/composer.json @@ -29,6 +29,7 @@ "embed/embed": "^4.4.7", "league/csv": "^9.8.0", "m1/env": "^2.2.0", + "masterminds/html5": "^2.7", "monolog/monolog": "^3.2.0", "nikic/php-parser": "^4.15.0", "psr/container": "^2.0", diff --git a/src/View/HTML.php b/src/View/HTML.php index 703a90ed0..cd42f219d 100644 --- a/src/View/HTML.php +++ b/src/View/HTML.php @@ -82,7 +82,7 @@ class HTML if ($content) { throw new InvalidArgumentException("Void element \"{$tag}\" cannot have content"); } - return "<{$tag}{$preparedAttributes} />"; + return "<{$tag}{$preparedAttributes}>"; } // Closed tag type diff --git a/src/View/Parsers/HTML4Value.php b/src/View/Parsers/HTML4Value.php deleted file mode 100644 index 528802df9..000000000 --- a/src/View/Parsers/HTML4Value.php +++ /dev/null @@ -1,31 +0,0 @@ -isValid()) { - $this->setDocument(null); - } - - $errorState = libxml_use_internal_errors(true); - $result = $this->getDocument()->loadHTML( - '' . "$content" - ); - libxml_clear_errors(); - libxml_use_internal_errors($errorState); - return $result; - } -} diff --git a/src/View/Parsers/HTMLValue.php b/src/View/Parsers/HTMLValue.php index 06ed6df34..ee44ec142 100644 --- a/src/View/Parsers/HTMLValue.php +++ b/src/View/Parsers/HTMLValue.php @@ -4,22 +4,20 @@ namespace SilverStripe\View\Parsers; use SilverStripe\Core\Convert; use SilverStripe\View\ViewableData; +use Masterminds\HTML5; use DOMNodeList; use DOMXPath; use DOMDocument; +use SilverStripe\View\HTML; /** * This class handles the converting of HTML fragments between a string and a DOMDocument based * representation. * - * It's designed to allow dependency injection to replace the standard HTML4 version with one that - * handles XHTML or HTML5 instead - * * @mixin DOMDocument */ -abstract class HTMLValue extends ViewableData +class HTMLValue extends ViewableData { - public function __construct($fragment = null) { if ($fragment) { @@ -28,7 +26,25 @@ abstract class HTMLValue extends ViewableData parent::__construct(); } - abstract public function setContent($fragment); + /** + * @param string $content + * @return bool + */ + public function setContent($content) + { + $content = preg_replace('#]*>#si', '', $content); + $html5 = new HTML5(['disable_html_ns' => true]); + $document = $html5->loadHTML( + '' . + "$content" + ); + if ($document) { + $this->setDocument($document); + return true; + } + $this->valid = false; + return false; + } /** * @return string diff --git a/src/View/Parsers/ShortcodeParser.php b/src/View/Parsers/ShortcodeParser.php index 41caf4c52..ed1cb858d 100644 --- a/src/View/Parsers/ShortcodeParser.php +++ b/src/View/Parsers/ShortcodeParser.php @@ -658,8 +658,8 @@ class ShortcodeParser // use a proper DOM list($content, $tags) = $this->replaceElementTagsWithMarkers($content); - /** @var HTMLValue $htmlvalue */ - $htmlvalue = Injector::inst()->create('HTMLValue', $content); + /** @var HTMLValue $htmlvalue */ + $htmlvalue = Injector::inst()->create(HTMLValue::class, $content); // Now parse the result into a DOM if (!$htmlvalue->isValid()) { diff --git a/tests/php/View/HTMLTest.php b/tests/php/View/HTMLTest.php index 9801e831f..362ec417f 100644 --- a/tests/php/View/HTMLTest.php +++ b/tests/php/View/HTMLTest.php @@ -14,7 +14,7 @@ class HTMLTest extends SapphireTest 'name' => 'description', 'content' => 'test tag', ]); - $this->assertEquals('', $tag); + $this->assertEquals('', $tag); } public function testEmptyAttributes() @@ -27,7 +27,7 @@ class HTMLTest extends SapphireTest 'disabled' => false, 'readonly' => true, ]); - $this->assertEquals('', $tag); + $this->assertEquals('', $tag); } public function testNormalTag() @@ -52,7 +52,7 @@ class HTMLTest extends SapphireTest 'alt' => '', ]); - $this->assertEquals('', $tag); + $this->assertEquals('', $tag); } public function testVoidContentError() diff --git a/tests/php/View/Parsers/HTML4ValueTest.php b/tests/php/View/Parsers/HTML4ValueTest.php deleted file mode 100644 index 833b10047..000000000 --- a/tests/php/View/Parsers/HTML4ValueTest.php +++ /dev/null @@ -1,98 +0,0 @@ -Enclosed Value

' - => '

Enclosed Value

', - '' - => '', - '

' - => '

', - '' - => '', - '/bodu>/body>' - => '/bodu>/body>' - ]; - - foreach ($invalid as $input => $expected) { - $value->setContent($input); - $this->assertEquals($expected, $value->getContent(), 'Invalid HTML can be saved'); - } - } - - public function testUtf8Saving() - { - $value = new HTML4Value(); - - $value->setContent('

ö ß ā い 家

'); - $this->assertEquals('

ö ß ā い 家

', $value->getContent()); - } - - public function testInvalidHTMLTagNames() - { - $value = new HTML4Value(); - - $invalid = [ - '

', - '
', - '""\'\'\'"""\'""<<<>/' - ]; - - foreach ($invalid as $input) { - $value->setContent($input); - $this->assertEquals( - 'test-link', - $value->getElementsByTagName('a')->item(0)->getAttribute('href'), - 'Link data can be extracted from malformed HTML' - ); - } - } - - public function testMixedNewlines() - { - $value = new HTML4Value(); - - $value->setContent("

paragraph

\n
  • 1
  • \r\n
"); - $this->assertEquals( - "

paragraph

\n
  • 1
  • \n
", - $value->getContent(), - 'Newlines get converted' - ); - } - - public function testAttributeEscaping() - { - $value = new HTML4Value(); - - $value->setContent('
'); - $this->assertEquals('', $value->getContent(), "'[' character isn't escaped"); - - $value->setContent(''); - $this->assertEquals('', $value->getContent(), "'\"' character is escaped"); - } - - public function testGetContent() - { - $value = new HTML4Value(); - - $value->setContent('

This is valid

'); - $this->assertEquals('

This is valid

', $value->getContent(), "Valid content is returned"); - - $value->setContent('valid is false - // for instance if a content editor saves something really weird in a LiteralField - // we can manually get to this state via ->setInvalid() - $value->setInvalid(); - $this->assertEquals('', $value->getContent(), "Blank string is returned when invalid"); - } -} diff --git a/tests/php/View/Parsers/HTMLValueTest.php b/tests/php/View/Parsers/HTMLValueTest.php new file mode 100644 index 000000000..7ce889dd7 --- /dev/null +++ b/tests/php/View/Parsers/HTMLValueTest.php @@ -0,0 +1,163 @@ +Enclosed Value

a' => '

Enclosed Value

a

', + '' => '', + '

' => '

', + '' => '', + '/bodu>/body>' => '/bodu>/body>' + ]; + + foreach ($invalid as $input => $expected) { + $value->setContent($input); + $this->assertEquals($expected, $value->getContent(), 'Invalid HTML can be parsed'); + } + } + + public function testUtf8Saving() + { + $value = new HTMLValue(); + + $value->setContent('

ö ß ā い 家

'); + $this->assertEquals('

ö ß ā い 家

', $value->getContent()); + } + + public function testWhitespaceHandling() + { + $value = new HTMLValue(); + + $value->setContent('

'); + $this->assertEquals('

', $value->getContent()); + } + + public function testInvalidHTMLTagNames() + { + $value = new HTMLValue(); + + $invalid = [ + '

', + '
' + ]; + + foreach ($invalid as $input) { + $value->setContent($input); + + $this->assertEquals( + 'test-link', + $value->getElementsByTagName('a')->item(0)->getAttribute('href'), + 'Link data can be extraced from malformed HTML' + ); + } + } + + public function testMixedNewlines() + { + $value = new HTMLValue(); + + $value->setContent("

paragraph

\n
  • 1
  • \r\n
"); + $this->assertEquals( + "

paragraph

\n
  • 1
  • \n
", + $value->getContent(), + 'Newlines get converted' + ); + } + + public function testAttributeEscaping() + { + $value = new HTMLValue(); + + $value->setContent(''); + $this->assertEquals('', $value->getContent(), "'[' character isn't escaped"); + + $value->setContent(''); + $this->assertEquals('', $value->getContent(), "'\"' character is escaped"); + } + + public function testShortcodeValue() + { + ShortcodeParser::get('default')->register( + 'test_shortcode', + function () { + return 'bit of test shortcode output'; + } + ); + $content = DBHTMLText::create('Test', ['shortcodes' => true]) + ->setValue('

Some content with a [test_shortcode] and a
followed by an


in it.

') + ->forTemplate(); + $this->assertStringContainsString( + // hr is flow content, not phrasing content, so must be corrected to be outside the p tag. + '

Some content with a bit of test shortcode output and a
followed by an


in it.', + $content + ); + } + + public function testEntities() + { + $content = 'ampersand & test & link'; + $output = new HTMLValue($content); + $output = $output->getContent(); + $this->assertEquals( + 'ampersand & test & link', + $output + ); + } + + public function testShortcodeEntities() + { + ShortcodeParser::get('default')->register( + 'sitetree_link_test', + // A mildly stubbed copy from SilverStripe\CMS\Model\SiteTree::link_shortcode_handler + function ($arguments, $content = null, $parser = null) { + $link = Convert::raw2att('https://google.com/search?q=unit&test'); + if ($content) { + $link = sprintf('%s', $link, $parser->parse($content)); + } + return $link; + } + ); + $content = [ + '[sitetree_link_test,id=2]' => 'https://google.com/search?q=unit&test', + // the random [ triggers the shortcode parser, which seems to be where problems arise. + ' [ non shortcode link' => + ' [ non shortcode link', + '[sitetree_link_test,id=1]test link[/sitetree_link_test]' => + 'test link' + ]; + foreach ($content as $input => $expected) { + $output = DBHTMLText::create('Test', ['shortcodes' => true]) + ->setValue($input) + ->forTemplate(); + $this->assertEquals($expected, $output); + } + } + + public function testValidHTMLInNoscriptTags() + { + $value = new HTMLValue(); + + $noscripts = [ + '', + '', + '', + ]; + + foreach ($noscripts as $noscript) { + $value->setContent($noscript); + $this->assertEquals($noscript, $value->getContent(), 'Child tags are left untouched in noscript tags.'); + } + } +}