From 1ee01c39d43c3933aff8a9e2795a1dcb1b7cc5bb Mon Sep 17 00:00:00 2001 From: Hamish Friedlander Date: Fri, 22 Feb 2013 10:30:51 +1300 Subject: [PATCH] FIX ShortcodeParser producing bad output after escaped tag Also tightens up matching of shortcodes so we dont match on invalid shortcodes --- parsers/ShortcodeParser.php | 89 +++++++++++++++++---------- tests/parsers/ShortcodeParserTest.php | 37 ++++++++++- 2 files changed, 91 insertions(+), 35 deletions(-) diff --git a/parsers/ShortcodeParser.php b/parsers/ShortcodeParser.php index c14035839..79ea1a52d 100644 --- a/parsers/ShortcodeParser.php +++ b/parsers/ShortcodeParser.php @@ -144,15 +144,8 @@ class ShortcodeParser { 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'ol', 'output', 'p', 'pre', 'section', 'table', 'ul' ); - - private static $tagrx = '/ - <(?(?:"[^"]*"[\'"]*|\'[^\']*\'[\'"]*|[^\'">])+)> | # HTML Tag - skip attribute scoped tags - \[ (?\[.*?\]) \] | # Escaped block - \[ (?\w+) (?.*?) (?\/?) \] | # Opening tag - \[\/ (?\w+) \] # Closing tag -/x'; - - private static $attrrx = '/ + + private static $attrrx = ' ([^\s\/\'"=,]+) # Name \s* = \s* (?: @@ -160,8 +153,35 @@ class ShortcodeParser { (?:"([^"]+)") | # Value surrounded by " (\w+) # Bare value ) -/x'; +'; + + private static function attrrx() { + return '/'.self::$attrrx.'/xS'; + } + private static $tagrx = ' + # HTML Tag + <(?(?:"[^"]*"[\'"]*|\'[^\']*\'[\'"]*|[^\'">])+)> + + | # Opening tag + (?\[?) + \[ + (?\w+) + [\s,]* + (? (?: %s [\s,]*)* ) + \/?\] + (?\]?) + + | # Closing tag + \[\/ + (?\w+) + \] + (?\]?) +'; + + private static function tagrx() { + return '/'.sprintf(self::$tagrx, self::$attrrx).'/xS'; + } const WARN = 'warn'; const STRIP = 'strip'; @@ -181,21 +201,11 @@ class ShortcodeParser { * @return array - The list of tags found. When using an open/close pair, only one item will be in the array, * with "content" set to the text between the tags */ - protected function extractTags(&$content) { + protected function extractTags($content) { $tags = array(); - $escapes = array(); - if(preg_match_all(self::$tagrx, $content, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE)) { + if(preg_match_all(self::tagrx(), $content, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE)) { foreach($matches as $match) { - // Record escaped tags - if (!empty($match['escaped'][0])) { - $escapes[] = array( - 's' => $match[0][1], - 'e' => $match[0][1] + strlen($match[0][0]), - 'content' => $match['escaped'] - ); - } - // Ignore any elements if (empty($match['open'][0]) && empty($match['close'][0])) continue; @@ -203,7 +213,7 @@ class ShortcodeParser { $attrs = array(); if (!empty($match['attrs'][0])) { - preg_match_all(self::$attrrx, $match['attrs'][0], $attrmatches, PREG_SET_ORDER); + preg_match_all(self::attrrx(), $match['attrs'][0], $attrmatches, PREG_SET_ORDER); foreach ($attrmatches as $attr) { list($whole, $name, $value) = array_values(array_filter($attr)); @@ -219,14 +229,15 @@ class ShortcodeParser { 'open' => @$match['open'][0], 'close' => @$match['close'][0], 'attrs' => $attrs, - 'content' => '' + 'content' => '', + 'escaped' => !empty($match['oesc'][0]) || !empty($match['cesc1'][0]) || !empty($match['cesc2'][0]) ); } } $i = count($tags); while($i--) { - if($tags[$i]['close']) { + if(!empty($tags[$i]['close'])) { // If the tag just before this one isn't the related opening tag, throw an error $err = null; @@ -244,21 +255,29 @@ class ShortcodeParser { if(self::$error_behavior == self::ERROR) user_error($err, E_USER_ERRROR); } else { + if ($tags[$i]['escaped']) { + if (!$tags[$i-1]['escaped']) { + $tags[$i]['e'] -= 1; + $tags[$i]['escaped'] = false; + } + } + else { + if ($tags[$i-1]['escaped']) { + $tags[$i-1]['s'] += 1; + $tags[$i-1]['escaped'] = false; + } + } + // Otherwise, grab content between tags, save in opening tag & delete the closing one $tags[$i-1]['text'] = substr($content, $tags[$i-1]['s'], $tags[$i]['e'] - $tags[$i-1]['s']); $tags[$i-1]['content'] = substr($content, $tags[$i-1]['e'], $tags[$i]['s'] - $tags[$i-1]['e']); $tags[$i-1]['e'] = $tags[$i]['e']; + unset($tags[$i]); } } } - $i = count($escapes); - while($i--) { - $escape = $escapes[$i]; - $content = substr_replace($content, $escape['content'], $escape['s'], $escape['e'] - $escape['s']); - } - return array_values($tags); } @@ -281,7 +300,13 @@ class ShortcodeParser { if ($li === null) $tail = substr($content, $tags[$i]['e']); else $tail = substr($content, $tags[$i]['e'], $li - $tags[$i]['e']); - $str = $generator($i, $tags[$i]). $tail . $str; + if ($tags[$i]['escaped']) { + $str = substr($content, $tags[$i]['s']+1, $tags[$i]['e'] - $tags[$i]['s'] - 2) . $tail . $str; + } + else { + $str = $generator($i, $tags[$i]) . $tail . $str; + } + $li = $tags[$i]['s']; } diff --git a/tests/parsers/ShortcodeParserTest.php b/tests/parsers/ShortcodeParserTest.php index 1d7a07848..3f571b484 100644 --- a/tests/parsers/ShortcodeParserTest.php +++ b/tests/parsers/ShortcodeParserTest.php @@ -100,9 +100,40 @@ class ShortcodeParserTest extends SapphireTest { } public function testShortcodeEscaping() { - $this->assertEquals('[test_shortcode]', $this->parser->parse('[[test_shortcode]]')); - $this->assertEquals('[test_shortcode]content[/test_shortcode]', - $this->parser->parse('[[test_shortcode]content[/test_shortcode]]')); + $this->assertEquals( + '[test_shortcode]', + $this->parser->parse('[[test_shortcode]]') + ); + + $this->assertEquals( + '[test_shortcode /]', + $this->parser->parse('[[test_shortcode /]]') + ); + + $this->assertEquals( + '[test_shortcode]content[/test_shortcode]', + $this->parser->parse('[[test_shortcode]content[/test_shortcode]]' + )); + + $this->assertEquals( + '[test_shortcode]content', + $this->parser->parse('[[test_shortcode]][test_shortcode]content[/test_shortcode]') + ); + + $this->assertEquals( + '[test_shortcode]content[/test_shortcode]content2', + $this->parser->parse('[[test_shortcode]content[/test_shortcode]][test_shortcode]content2[/test_shortcode]' + )); + + $this->assertEquals( + '[[Doesnt strip double [ character if not a shortcode', + $this->parser->parse('[[Doesnt strip double [ character if not a [test_shortcode]shortcode[/test_shortcode]' + )); + + $this->assertEquals( + '[[Doesnt shortcode get confused by double ]] characters', + $this->parser->parse('[[Doesnt [test_shortcode]shortcode[/test_shortcode] get confused by double ]] characters' + )); } public function testUnquotedArguments() {