From cbef44b8d633fa6151914258b14cc836a6cd2bff Mon Sep 17 00:00:00 2001 From: Hamish Friedlander Date: Wed, 20 Feb 2013 10:35:06 +1300 Subject: [PATCH] FIX Make ShortcodeParser#parse passthrough not error on invalid HTML --- parsers/ShortcodeParser.php | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/parsers/ShortcodeParser.php b/parsers/ShortcodeParser.php index fb28fa7b3..6d7e07bd0 100644 --- a/parsers/ShortcodeParser.php +++ b/parsers/ShortcodeParser.php @@ -456,6 +456,31 @@ class ShortcodeParser { $this->removeNode($node); } + protected function loadHTML($html) { + require_once(THIRDPARTY_PATH.'/html5lib/HTML5/Parser.php'); + + // Convert any errors to exceptions + set_error_handler( + function($no, $str){ + throw new Exception("HTML Parse Error: ".$str); + }, + error_reporting() + ); + + // Use HTML5lib to parse the HTML fragment + try { + $bases = HTML5_Parser::parseFragment(trim($html), 'div'); + } + catch (Exception $e) { + $bases = null; + } + + // Disable our error handler (restoring to previous value) + restore_error_handler(); + + return $bases; + } + /** * Parse a string, and replace any registered shortcodes within it with the result of the mapped callback. * @@ -474,9 +499,8 @@ class ShortcodeParser { list($content, $tags) = $this->replaceElementTagsWithMarkers($content); // Now parse the result into a DOM - require_once(THIRDPARTY_PATH.'/html5lib/HTML5/Parser.php'); - $bases = HTML5_Parser::parseFragment(trim($content), 'div'); - + $bases = $this->loadHTML($content); + // If we couldn't parse the HTML, error out if (!$bases || !$bases->length) { if(self::$error_behavior == self::ERROR) { @@ -488,8 +512,8 @@ class ShortcodeParser { } $res = ''; - $html = $bases->item(0)->parentNode; - $doc = $html->ownerDocument; + $container = $bases->item(0)->parentNode; + $doc = $container->ownerDocument; $xp = new DOMXPath($doc); @@ -526,7 +550,7 @@ class ShortcodeParser { $this->replaceMarkerWithContent($shortcode, $tag); } - foreach($html->childNodes as $child) $res .= $doc->saveHTML($child); + foreach($container->childNodes as $child) $res .= $doc->saveHTML($child); return $res; }