mirror of
https://github.com/silverstripe/silverstripe-framework
synced 2024-10-22 14:05:37 +02:00
BUGFIX: Updated HTTP::findByTagAndAttribute() to be more versatile, especially when dealing with attributes containing special characters.
From: Andrew Short <andrewjshort@gmail.com> git-svn-id: svn://svn.silverstripe.com/silverstripe/open/modules/sapphire/trunk@88469 467b73ca-7a2a-4603-9d3b-597d59a354a9
This commit is contained in:
parent
bd5b134c37
commit
75b875ae39
@ -99,27 +99,33 @@ class HTTP {
|
|||||||
$url = self::setGetVar($varname, $varvalue, $currentURL);
|
$url = self::setGetVar($varname, $varvalue, $currentURL);
|
||||||
return Convert::xml2raw($url);
|
return Convert::xml2raw($url);
|
||||||
}
|
}
|
||||||
|
|
||||||
static function findByTagAndAttribute($content, $attribs) {
|
/**
|
||||||
$regExps = array();
|
* Search for all tags with a specific attribute, then return the value of that attribute in a flat array.
|
||||||
|
*
|
||||||
foreach($attribs as $tag => $attrib) {
|
* @param string $content
|
||||||
$tagPrefix = (is_numeric($tag)) ? '' : "$tag ";
|
* @param array $attributes an array of tags to attributes, for example "[a] => 'href', [div] => 'id'"
|
||||||
|
* @return array
|
||||||
$regExps[] = "/(<{$tagPrefix}[^>]*$attrib *= *\")([^\"]*)(\")/ie";
|
*/
|
||||||
$regExps[] = "/(<{$tagPrefix}[^>]*$attrib *= *')([^']*)(')/ie";
|
public static function findByTagAndAttribute($content, $attributes) {
|
||||||
$regExps[] = "/(<{$tagPrefix}[^>]*$attrib *= *)([^\"' ]*)( )/ie";
|
$regexes = array();
|
||||||
}
|
|
||||||
|
foreach($attributes as $tag => $attribute) {
|
||||||
if($regExps) {
|
$regexes[] = "/<{$tag} [^>]*$attribute *= *([\"'])(.*?)\\1[^>]*>/i";
|
||||||
foreach($regExps as $regExp) {
|
$regexes[] = "/<{$tag} [^>]*$attribute *= *([^ \"'>]+)/i";
|
||||||
$content = preg_replace($regExp, '$items[] = "$2"', $content);
|
}
|
||||||
}
|
|
||||||
}
|
$result = array();
|
||||||
|
|
||||||
return isset($items) ? $items : null;
|
if($regexes) foreach($regexes as $regex) {
|
||||||
}
|
if(preg_match_all($regex, $content, $matches)) {
|
||||||
|
$result = array_merge_recursive($result, (isset($matches[2]) ? $matches[2] : $matches[1]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return count($result) ? $result : null;
|
||||||
|
}
|
||||||
|
|
||||||
static function getLinksIn($content) {
|
static function getLinksIn($content) {
|
||||||
return self::findByTagAndAttribute($content, array("a" => "href"));
|
return self::findByTagAndAttribute($content, array("a" => "href"));
|
||||||
}
|
}
|
||||||
@ -349,4 +355,4 @@ class HTTP {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
||||||
|
@ -10,17 +10,38 @@ class HTTPTest extends SapphireTest {
|
|||||||
/**
|
/**
|
||||||
* Tests {@link HTTP::getLinksIn()}
|
* Tests {@link HTTP::getLinksIn()}
|
||||||
*/
|
*/
|
||||||
public function testGetLinksIn() {
|
public function testGetLinksIn() {
|
||||||
$content = '
|
$content = '
|
||||||
<h2>My page</h2>
|
<h2><a href="/">My Cool Site</a></h2>
|
||||||
<p>A boy went <a href="home/">home</a> to see his <span><a href="mother/">mother</a></span>.</p>
|
|
||||||
';
|
<p>
|
||||||
|
A boy went <a href="home/">home</a> to see his <span><a href="mother/">mother</a></span>. This
|
||||||
$links = HTTP::getLinksIn($content);
|
involved a short <a href="$Journey">journey</a>, as well as some <a href="space travel">space travel</a>
|
||||||
|
and <a href=unquoted>unquoted</a> events, as well as a <a href=\'single quote\'>single quote</a> from
|
||||||
$this->assertTrue(is_array($links));
|
his <a href="/father">father</a>.
|
||||||
$this->assertTrue(count($links) == 2);
|
</p>
|
||||||
}
|
|
||||||
|
<p>
|
||||||
|
There were also some elements with extra <a class=attribute href=\'attributes\'>attributes</a> which
|
||||||
|
played a part in his <a href=journey"extra id="JourneyLink">journey</a>. HE ALSO DISCOVERED THE
|
||||||
|
<A HREF="CAPS LOCK">KEY</a>. Later he got his <a href="quotes \'mixed\' up">mixed up</a>.
|
||||||
|
</p>
|
||||||
|
';
|
||||||
|
|
||||||
|
$expected = array (
|
||||||
|
'/', 'home/', 'mother/', '$Journey', 'space travel', 'unquoted', 'single quote', '/father', 'attributes',
|
||||||
|
'journey', 'CAPS LOCK', 'quotes \'mixed\' up'
|
||||||
|
);
|
||||||
|
|
||||||
|
$result = HTTP::getLinksIn($content);
|
||||||
|
|
||||||
|
// Results don't neccesarily come out in the order they are in the $content param.
|
||||||
|
sort($result);
|
||||||
|
sort($expected);
|
||||||
|
|
||||||
|
$this->assertTrue(is_array($result));
|
||||||
|
$this->assertEquals($expected, $result, 'Test that all links within the content are found.');
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests {@link HTTP::setGetVar()}
|
* Tests {@link HTTP::setGetVar()}
|
||||||
|
Loading…
Reference in New Issue
Block a user