FIX ShortcodeParser producing bad output after escaped tag

Also tightens up matching of shortcodes so we dont match on invalid shortcodes
This commit is contained in:
Hamish Friedlander 2013-02-22 10:30:51 +13:00
parent 8a70019e78
commit 1ee01c39d4
2 changed files with 91 additions and 35 deletions

View File

@ -145,14 +145,7 @@ class ShortcodeParser {
'pre', 'section', 'table', 'ul'
);
private static $tagrx = '/
<(?<element>(?:"[^"]*"[\'"]*|\'[^\']*\'[\'"]*|[^\'">])+)> | # HTML Tag - skip attribute scoped tags
\[ (?<escaped>\[.*?\]) \] | # Escaped block
\[ (?<open>\w+) (?<attrs>.*?) (?<selfclosed>\/?) \] | # Opening tag
\[\/ (?<close>\w+) \] # Closing tag
/x';
private static $attrrx = '/
private static $attrrx = '
([^\s\/\'"=,]+) # Name
\s* = \s*
(?:
@ -160,8 +153,35 @@ class ShortcodeParser {
(?:"([^"]+)") | # Value surrounded by "
(\w+) # Bare value
)
/x';
';
private static function attrrx() {
return '/'.self::$attrrx.'/xS';
}
private static $tagrx = '
# HTML Tag
<(?<element>(?:"[^"]*"[\'"]*|\'[^\']*\'[\'"]*|[^\'">])+)>
| # Opening tag
(?<oesc>\[?)
\[
(?<open>\w+)
[\s,]*
(?<attrs> (?: %s [\s,]*)* )
\/?\]
(?<cesc1>\]?)
| # Closing tag
\[\/
(?<close>\w+)
\]
(?<cesc2>\]?)
';
private static function tagrx() {
return '/'.sprintf(self::$tagrx, self::$attrrx).'/xS';
}
const WARN = 'warn';
const STRIP = 'strip';
@ -181,21 +201,11 @@ class ShortcodeParser {
* @return array - The list of tags found. When using an open/close pair, only one item will be in the array,
* with "content" set to the text between the tags
*/
protected function extractTags(&$content) {
protected function extractTags($content) {
$tags = array();
$escapes = array();
if(preg_match_all(self::$tagrx, $content, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE)) {
if(preg_match_all(self::tagrx(), $content, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE)) {
foreach($matches as $match) {
// Record escaped tags
if (!empty($match['escaped'][0])) {
$escapes[] = array(
's' => $match[0][1],
'e' => $match[0][1] + strlen($match[0][0]),
'content' => $match['escaped']
);
}
// Ignore any elements
if (empty($match['open'][0]) && empty($match['close'][0])) continue;
@ -203,7 +213,7 @@ class ShortcodeParser {
$attrs = array();
if (!empty($match['attrs'][0])) {
preg_match_all(self::$attrrx, $match['attrs'][0], $attrmatches, PREG_SET_ORDER);
preg_match_all(self::attrrx(), $match['attrs'][0], $attrmatches, PREG_SET_ORDER);
foreach ($attrmatches as $attr) {
list($whole, $name, $value) = array_values(array_filter($attr));
@ -219,14 +229,15 @@ class ShortcodeParser {
'open' => @$match['open'][0],
'close' => @$match['close'][0],
'attrs' => $attrs,
'content' => ''
'content' => '',
'escaped' => !empty($match['oesc'][0]) || !empty($match['cesc1'][0]) || !empty($match['cesc2'][0])
);
}
}
$i = count($tags);
while($i--) {
if($tags[$i]['close']) {
if(!empty($tags[$i]['close'])) {
// If the tag just before this one isn't the related opening tag, throw an error
$err = null;
@ -244,21 +255,29 @@ class ShortcodeParser {
if(self::$error_behavior == self::ERROR) user_error($err, E_USER_ERRROR);
}
else {
if ($tags[$i]['escaped']) {
if (!$tags[$i-1]['escaped']) {
$tags[$i]['e'] -= 1;
$tags[$i]['escaped'] = false;
}
}
else {
if ($tags[$i-1]['escaped']) {
$tags[$i-1]['s'] += 1;
$tags[$i-1]['escaped'] = false;
}
}
// Otherwise, grab content between tags, save in opening tag & delete the closing one
$tags[$i-1]['text'] = substr($content, $tags[$i-1]['s'], $tags[$i]['e'] - $tags[$i-1]['s']);
$tags[$i-1]['content'] = substr($content, $tags[$i-1]['e'], $tags[$i]['s'] - $tags[$i-1]['e']);
$tags[$i-1]['e'] = $tags[$i]['e'];
unset($tags[$i]);
}
}
}
$i = count($escapes);
while($i--) {
$escape = $escapes[$i];
$content = substr_replace($content, $escape['content'], $escape['s'], $escape['e'] - $escape['s']);
}
return array_values($tags);
}
@ -281,7 +300,13 @@ class ShortcodeParser {
if ($li === null) $tail = substr($content, $tags[$i]['e']);
else $tail = substr($content, $tags[$i]['e'], $li - $tags[$i]['e']);
if ($tags[$i]['escaped']) {
$str = substr($content, $tags[$i]['s']+1, $tags[$i]['e'] - $tags[$i]['s'] - 2) . $tail . $str;
}
else {
$str = $generator($i, $tags[$i]) . $tail . $str;
}
$li = $tags[$i]['s'];
}

View File

@ -100,9 +100,40 @@ class ShortcodeParserTest extends SapphireTest {
}
public function testShortcodeEscaping() {
$this->assertEquals('[test_shortcode]', $this->parser->parse('[[test_shortcode]]'));
$this->assertEquals('[test_shortcode]content[/test_shortcode]',
$this->parser->parse('[[test_shortcode]content[/test_shortcode]]'));
$this->assertEquals(
'[test_shortcode]',
$this->parser->parse('[[test_shortcode]]')
);
$this->assertEquals(
'[test_shortcode /]',
$this->parser->parse('[[test_shortcode /]]')
);
$this->assertEquals(
'[test_shortcode]content[/test_shortcode]',
$this->parser->parse('[[test_shortcode]content[/test_shortcode]]'
));
$this->assertEquals(
'[test_shortcode]content',
$this->parser->parse('[[test_shortcode]][test_shortcode]content[/test_shortcode]')
);
$this->assertEquals(
'[test_shortcode]content[/test_shortcode]content2',
$this->parser->parse('[[test_shortcode]content[/test_shortcode]][test_shortcode]content2[/test_shortcode]'
));
$this->assertEquals(
'[[Doesnt strip double [ character if not a shortcode',
$this->parser->parse('[[Doesnt strip double [ character if not a [test_shortcode]shortcode[/test_shortcode]'
));
$this->assertEquals(
'[[Doesnt shortcode get confused by double ]] characters',
$this->parser->parse('[[Doesnt [test_shortcode]shortcode[/test_shortcode] get confused by double ]] characters'
));
}
public function testUnquotedArguments() {