BUGFIX: Updated HTTP::findByTagAndAttribute() to be more versatile, especially when dealing with attributes containing special characters.

From: Andrew Short <andrewjshort@gmail.com>

git-svn-id: svn://svn.silverstripe.com/silverstripe/open/modules/sapphire/trunk@88469 467b73ca-7a2a-4603-9d3b-597d59a354a9
This commit is contained in:
Andrew Short 2009-10-11 00:06:55 +00:00 committed by Sam Minnee
parent bd5b134c37
commit 75b875ae39
2 changed files with 60 additions and 33 deletions

View File

@ -99,27 +99,33 @@ class HTTP {
$url = self::setGetVar($varname, $varvalue, $currentURL);
return Convert::xml2raw($url);
}
static function findByTagAndAttribute($content, $attribs) {
$regExps = array();
foreach($attribs as $tag => $attrib) {
$tagPrefix = (is_numeric($tag)) ? '' : "$tag ";
$regExps[] = "/(<{$tagPrefix}[^>]*$attrib *= *\")([^\"]*)(\")/ie";
$regExps[] = "/(<{$tagPrefix}[^>]*$attrib *= *')([^']*)(')/ie";
$regExps[] = "/(<{$tagPrefix}[^>]*$attrib *= *)([^\"' ]*)( )/ie";
}
if($regExps) {
foreach($regExps as $regExp) {
$content = preg_replace($regExp, '$items[] = "$2"', $content);
}
}
return isset($items) ? $items : null;
}
/**
* Search for all tags with a specific attribute, then return the value of that attribute in a flat array.
*
* @param string $content
* @param array $attributes an array of tags to attributes, for example "[a] => 'href', [div] => 'id'"
* @return array
*/
public static function findByTagAndAttribute($content, $attributes) {
$regexes = array();
foreach($attributes as $tag => $attribute) {
$regexes[] = "/<{$tag} [^>]*$attribute *= *([\"'])(.*?)\\1[^>]*>/i";
$regexes[] = "/<{$tag} [^>]*$attribute *= *([^ \"'>]+)/i";
}
$result = array();
if($regexes) foreach($regexes as $regex) {
if(preg_match_all($regex, $content, $matches)) {
$result = array_merge_recursive($result, (isset($matches[2]) ? $matches[2] : $matches[1]));
}
}
return count($result) ? $result : null;
}
static function getLinksIn($content) {
return self::findByTagAndAttribute($content, array("a" => "href"));
}
@ -349,4 +355,4 @@ class HTTP {
}
?>
?>

View File

@ -10,17 +10,38 @@ class HTTPTest extends SapphireTest {
/**
* Tests {@link HTTP::getLinksIn()}
*/
public function testGetLinksIn() {
$content = '
<h2>My page</h2>
<p>A boy went <a href="home/">home</a> to see his <span><a href="mother/">mother</a></span>.</p>
';
$links = HTTP::getLinksIn($content);
$this->assertTrue(is_array($links));
$this->assertTrue(count($links) == 2);
}
public function testGetLinksIn() {
$content = '
<h2><a href="/">My Cool Site</a></h2>
<p>
A boy went <a href="home/">home</a> to see his <span><a href="mother/">mother</a></span>. This
involved a short <a href="$Journey">journey</a>, as well as some <a href="space travel">space travel</a>
and <a href=unquoted>unquoted</a> events, as well as a <a href=\'single quote\'>single quote</a> from
his <a href="/father">father</a>.
</p>
<p>
There were also some elements with extra <a class=attribute href=\'attributes\'>attributes</a> which
played a part in his <a href=journey"extra id="JourneyLink">journey</a>. HE ALSO DISCOVERED THE
<A HREF="CAPS LOCK">KEY</a>. Later he got his <a href="quotes \'mixed\' up">mixed up</a>.
</p>
';
$expected = array (
'/', 'home/', 'mother/', '$Journey', 'space travel', 'unquoted', 'single quote', '/father', 'attributes',
'journey', 'CAPS LOCK', 'quotes \'mixed\' up'
);
$result = HTTP::getLinksIn($content);
// Results don't neccesarily come out in the order they are in the $content param.
sort($result);
sort($expected);
$this->assertTrue(is_array($result));
$this->assertEquals($expected, $result, 'Test that all links within the content are found.');
}
/**
* Tests {@link HTTP::setGetVar()}