mirror of
https://github.com/silverstripe/silverstripe-framework
synced 2024-10-22 12:05:37 +00:00
MINOR: Tests for previous HTMLText#Summary and HTMLText#FirstSentence patch
BUGFIX: A couple of bugfixes on HTMLText#Summary and HTMLText#FirstSentence so the trickiest tests pass git-svn-id: svn://svn.silverstripe.com/silverstripe/open/modules/sapphire/branches/2.3@78728 467b73ca-7a2a-4603-9d3b-597d59a354a9
This commit is contained in:
parent
8aea22c283
commit
98a75735ee
@ -41,32 +41,39 @@ class HTMLText extends Text {
|
||||
/* First we need the text of the first paragraph, without tags. Try using SimpleXML first */
|
||||
if (class_exists('SimpleXMLElement')) {
|
||||
$doc = new DOMDocument();
|
||||
$doc->strictErrorChecking = FALSE;
|
||||
if ($doc->loadHTML('<meta content="text/html; charset=utf-8" http-equiv="Content-type"/>' . $this->value)) {
|
||||
|
||||
/* Catch warnings thrown by loadHTML and turn them into a failure boolean rather than a SilverStripe error */
|
||||
set_error_handler(create_function('$no, $str', 'throw new Exception("HTML Parse Error: ".$str);'), E_ALL);
|
||||
try { $res = $doc->loadHTML('<meta content="text/html; charset=utf-8" http-equiv="Content-type"/>' . $this->value); }
|
||||
catch (Exception $e) { $res = false; }
|
||||
restore_error_handler();
|
||||
|
||||
if ($res) {
|
||||
$xml = simplexml_import_dom($doc);
|
||||
$res = $xml->xpath('//p');
|
||||
if (!empty($res)) $str = strip_tags($res[0]->asXML());
|
||||
}
|
||||
}
|
||||
|
||||
/* If that failed, most likely the passed HTML is broken. use a simple regex + a custom more brutal strip_tags. We don't use strip_tags because
|
||||
* that does very badly on broken HTML*/
|
||||
if (!$str) {
|
||||
/* If that failed, use a simple regex + a strip_tags. We look for the first paragraph with some words in it, not just the first paragraph.
|
||||
* Not as good on broken HTML, and doesn't understand escaping or cdata blocks, but will probably work on even very malformed HTML */
|
||||
if (preg_match('{<p[^>]*>(.*[A-Za-z]+.*)</p>}', $this->value, $matches)) {
|
||||
$str = strip_tags($matches[1]);
|
||||
}
|
||||
/* If _that_ failed, just use the whole text with strip_tags */
|
||||
else {
|
||||
$str = strip_tags($this->value);
|
||||
}
|
||||
/* See if we can pull a paragraph out*/
|
||||
if (preg_match('{<p(\s[^<>]*)?>(.*[A-Za-z]+.*)</p>}', $this->value, $matches)) $str = $matches[2];
|
||||
/* If _that_ failed, just use the whole text */
|
||||
else $str = $this->value;
|
||||
|
||||
/* Now pull out all the html-alike stuff */
|
||||
$str = preg_replace('{</?[a-zA-Z]+[^<>]*>}', '', $str); /* Take out anything that is obviously a tag */
|
||||
$str = preg_replace('{</|<|>}', '', $str); /* Strip out any left over looking bits. Textual < or > should already be encoded to < or > */
|
||||
}
|
||||
|
||||
/* Now split into words. If we are under the maxWords limit, just return the whole string */
|
||||
/* Now split into words. If we are under the maxWords limit, just return the whole string (re-implode for whitespace normalization) */
|
||||
$words = preg_split('/\s+/', $str);
|
||||
if ($maxWords == -1 || count($words) <= $maxWords) return $str;
|
||||
if ($maxWords == -1 || count($words) <= $maxWords) return implode(' ', $words);
|
||||
|
||||
/* Otherwise work backwards for a looking for a sentence ending (we try to avoid abbreviations, but aren't very good at it) */
|
||||
for ($i = $maxWords; $i > $maxWords - $flex; $i--) {
|
||||
for ($i = $maxWords; $i >= $maxWords - $flex && $i >= 0; $i--) {
|
||||
if (preg_match('/\.$/', $words[$i]) && !preg_match('/(Dr|Mr|Mrs|Ms|Miss|Sr|Jr|No)\.$/i', $words[$i])) {
|
||||
return implode(' ', array_slice($words, 0, $i+1));
|
||||
}
|
||||
|
@ -21,5 +21,86 @@ class HTMLTextTest extends SapphireTest {
|
||||
}
|
||||
}
|
||||
|
||||
function testSummaryBasics() {
|
||||
$cases = array(
|
||||
'<h1>Should not take header</h1><p>Should take paragraph</p>' => 'Should take paragraph',
|
||||
'<p>Should strip <b>tags, but leave</b> text</p>' => 'Should strip tags, but leave text',
|
||||
'<p>Unclosed tags <br>should not phase it</p>' => 'Unclosed tags should not phase it',
|
||||
'<p>Second paragraph</p><p>should not cause errors or appear in output</p>' => 'Second paragraph'
|
||||
);
|
||||
|
||||
foreach($cases as $originalValue => $expectedValue) {
|
||||
$textObj = new HTMLText('Test');
|
||||
$textObj->setValue($originalValue);
|
||||
$this->assertEquals($expectedValue, $textObj->Summary());
|
||||
}
|
||||
}
|
||||
|
||||
function testSummaryLimits() {
|
||||
$cases = array(
|
||||
'<p>A long paragraph should be cut off if limit is set</p>' => 'A long paragraph should be...',
|
||||
'<p>No matter <i>how many <b>tags</b></i> are in it</p>' => 'No matter how many tags...',
|
||||
'<p>A sentence is. nicer than hard limits</p>' => 'A sentence is.',
|
||||
'<p>But not. If it\'s too short</p>' => 'But not. If it\'s too...'
|
||||
);
|
||||
|
||||
foreach($cases as $originalValue => $expectedValue) {
|
||||
$textObj = new HTMLText('Test');
|
||||
$textObj->setValue($originalValue);
|
||||
$this->assertEquals($expectedValue, $textObj->Summary(5, 3, '...'));
|
||||
}
|
||||
}
|
||||
|
||||
function testSummaryEndings() {
|
||||
$cases = array(
|
||||
'...', ' -> more', ''
|
||||
);
|
||||
|
||||
$orig = '<p>Cut it off, cut it off</p>';
|
||||
$match = 'Cut it off, cut';
|
||||
|
||||
foreach($cases as $add) {
|
||||
$textObj = new HTMLText();
|
||||
$textObj->setValue($orig);
|
||||
$this->assertEquals($match.$add, $textObj->Summary(4, 0, $add));
|
||||
}
|
||||
}
|
||||
|
||||
function testSummaryFlexTooBigShouldNotCauseError() {
|
||||
$orig = '<p>Cut it off, cut it off</p>';
|
||||
$match = 'Cut it off, cut';
|
||||
|
||||
$textObj = new HTMLText();
|
||||
$textObj->setValue($orig);
|
||||
$this->assertEquals($match, $textObj->Summary(4, 10, ''));
|
||||
}
|
||||
|
||||
function testSummaryInvalidHTML() {
|
||||
$cases = array(
|
||||
'It\'s got a <p<> tag, but<p junk true>This doesn\'t <a id="boo">make</b class="wa"> < ><any< sense</p>' => 'This doesn\'t make any',
|
||||
'This doesn\'t <a style="much horray= true>even</b> < ><have< a <i>p tag' => 'This doesn\'t even have'
|
||||
);
|
||||
|
||||
foreach($cases as $orig => $match) {
|
||||
$textObj = new HTMLText();
|
||||
$textObj->setValue($orig);
|
||||
$this->assertEquals($match, $textObj->Summary(4, 0, ''));
|
||||
}
|
||||
}
|
||||
|
||||
function testFirstSentence() {
|
||||
$many = str_repeat('many ', 100);
|
||||
$cases = array(
|
||||
'<h1>should ignore</h1><p>First sentence. Second sentence.</p>' => 'First sentence.',
|
||||
'<h1>should ignore</h1><p>First Mr. sentence. Second sentence.</p>' => 'First Mr. sentence.',
|
||||
"<h1>should ignore</h1><p>Sentence with {$many}words. Second sentence.</p>" => "Sentence with {$many}words.",
|
||||
);
|
||||
|
||||
foreach($cases as $orig => $match) {
|
||||
$textObj = new HTMLText();
|
||||
$textObj->setValue($orig);
|
||||
$this->assertEquals($match, $textObj->FirstSentence());
|
||||
}
|
||||
}
|
||||
}
|
||||
?>
|
Loading…
x
Reference in New Issue
Block a user