mirror of
synced 2024-10-22 12:05:37 +00:00
ContextSummary() was cutting the HTML which was added by nl2br because it expected plain text elements as it's stripping and replacing text. Instead this fix changes the behaviour to apply the nl2br after the text changes have been made. That way we can't cut anything in the middle of a HTML tag, but new lines, or paragraphs are replaced by BRs after, should they exist. - Added tests to ensure text is not cut in the middle of a sentence. - Added test to ensure that <br>'s are added in the correct place should the summary span between new lines.
639 lines
22 KiB
639 lines
22 KiB
namespace SilverStripe\ORM\Tests;
use SilverStripe\Control\Director;
use SilverStripe\Core\Injector\Injector;
use SilverStripe\ORM\FieldType\DBHTMLText;
use SilverStripe\ORM\FieldType\DBField;
use SilverStripe\Core\Convert;
use SilverStripe\Core\Config\Config;
use SilverStripe\Dev\SapphireTest;
use SilverStripe\ORM\Tests\DBHTMLTextTest\TestShortcode;
use SilverStripe\View\Parsers\ShortcodeParser;
class DBHTMLTextTest extends SapphireTest
private $previousLocaleSetting = null;
protected function setUp()
// clear the previous locale setting
$this->previousLocaleSetting = null;
// Set test handler
->register('test_shortcode', array(TestShortcode::class, 'handle_shortcode'));
protected function tearDown()
// If a test sets the locale, reset it on teardown
if ($this->previousLocaleSetting) {
setlocale(LC_CTYPE, $this->previousLocaleSetting);
* Test {@link Text->LimitCharacters()}
public function providerLimitCharacters()
// HTML characters are stripped safely
return [
['The little brown fox jumped over the lazy cow.', 'The little brown fox...'],
['<p>Short & Sweet</p>', 'Short & Sweet'],
['This text contains & in it', 'This text contains &...'],
* Test {@link DBHTMLText->LimitCharacters()}
* @dataProvider providerLimitCharacters
* @param string $originalValue
* @param string $expectedValue
public function testLimitCharacters($originalValue, $expectedValue)
$textObj = DBField::create_field('HTMLFragment', $originalValue);
$result = $textObj->obj('LimitCharacters')->forTemplate();
$this->assertEquals($expectedValue, $result);
* @return array
public function providerLimitCharactersToClosestWord()
// HTML is converted safely to plain text
return [
// Standard words limited, ellipsis added if truncated
['<p>Lorem ipsum dolor sit amet</p>', 24, 'Lorem ipsum dolor sit...'],
// Complete words less than the character limit don't get truncated, ellipsis not added
['<p>Lorem ipsum</p>', 24, 'Lorem ipsum'],
['<p>Lorem</p>', 24, 'Lorem'],
['', 24, ''], // No words produces nothing!
// Special characters are encoded safely
['Nice & Easy', 24, 'Nice & Easy'],
// HTML is safely converted to plain text
['<p>Lorem ipsum dolor sit amet</p>', 24, 'Lorem ipsum dolor sit...'],
['<p><span>Lorem ipsum dolor sit amet</span></p>', 24, 'Lorem ipsum dolor sit...'],
['<p>Lorem ipsum</p>', 24, 'Lorem ipsum'],
['Lorem & ipsum dolor sit amet', 24, 'Lorem & ipsum dolor sit...']
* Test {@link DBHTMLText->LimitCharactersToClosestWord()}
* @dataProvider providerLimitCharactersToClosestWord
* @param string $originalValue Raw string input
* @param int $limit
* @param string $expectedValue Expected template value
public function testLimitCharactersToClosestWord($originalValue, $limit, $expectedValue)
$textObj = DBField::create_field('HTMLFragment', $originalValue);
$result = $textObj->obj('LimitCharactersToClosestWord', [$limit])->forTemplate();
$this->assertEquals($expectedValue, $result);
public function providerSummary()
return [
'<p>Should strip <b>tags, but leave</b> text</p>',
'Should strip tags, but leave text',
// Line breaks are preserved
'<p>Unclosed tags <br>should not phase it</p>',
"Unclosed tags <br />\nshould not phase it",
// Paragraphs converted to linebreak
'<p>Second paragraph</p><p>should not cause errors or appear in output</p>',
"Second paragraph<br />\n<br />\nshould not cause errors or appear in output",
'<img src="hello" /><p>Second paragraph</p><p>should not cause errors or appear in output</p>',
"Second paragraph<br />\n<br />\nshould not cause errors or appear in output",
' <img src="hello" /><p>Second paragraph</p><p>should not cause errors or appear in output</p>',
"Second paragraph<br />\n<br />\nshould not cause errors or appear in output",
'<p><img src="remove me">example <img src="include me">text words hello<img src="hello"></p>',
'example text words hello',
// Shorter limits
'<p>A long paragraph should be cut off if limit is set</p>',
'A long paragraph should be...',
'<p>No matter <i>how many <b>tags</b></i> are in it</p>',
'No matter how many tags...',
'<p>A sentence is. nicer than hard limits</p>',
'A sentence is.',
* @dataProvider providerSummary
* @param string $originalValue
* @param int $limit
* @param string $expectedValue
public function testSummary($originalValue, $limit, $expectedValue)
$textObj = DBField::create_field('HTMLFragment', $originalValue);
$result = $textObj->obj('Summary', [$limit])->forTemplate();
$this->assertEquals($expectedValue, $result);
public function testSummaryEndings()
$cases = array(
' -> more',
$orig = '<p>Cut it off, cut it off</p>';
$match = 'Cut it off, cut';
foreach ($cases as $add) {
$textObj = DBField::create_field('HTMLFragment', $orig);
$result = $textObj->obj('Summary', [4, $add])->forTemplate();
$this->assertEquals($match . Convert::raw2xml($add), $result);
public function providerFirstSentence()
return [
// Same behaviour as DBTextTest
['', ''],
['First sentence.', 'First sentence.'],
['First sentence. Second sentence', 'First sentence.'],
['First sentence? Second sentence', 'First sentence?'],
['First sentence! Second sentence', 'First sentence!'],
// DBHTHLText strips HTML first
['<br />First sentence.', 'First sentence.'],
['<p>First sentence. Second sentence. Third sentence</p>', 'First sentence.'],
* @dataProvider providerFirstSentence
* @param string $originalValue
* @param string $expectedValue
public function testFirstSentence($originalValue, $expectedValue)
$textObj = DBField::create_field('HTMLFragment', $originalValue);
$result = $textObj->obj('FirstSentence')->forTemplate();
$this->assertEquals($expectedValue, $result);
public function testCreate()
/** @var DBHTMLText $field */
$field = Injector::inst()->create("HTMLFragment(['whitelist' => 'link'])", 'MyField');
$this->assertEquals(['link'], $field->getWhitelist());
$field = Injector::inst()->create("HTMLFragment(['whitelist' => 'link,a'])", 'MyField');
$this->assertEquals(['link', 'a'], $field->getWhitelist());
$field = Injector::inst()->create("HTMLFragment(['whitelist' => ['link', 'a']])", 'MyField');
$this->assertEquals(['link', 'a'], $field->getWhitelist());
$field = Injector::inst()->create("HTMLFragment", 'MyField');
// Test shortcodes
$field = Injector::inst()->create("HTMLFragment(['shortcodes' => true])", 'MyField');
$this->assertEquals(true, $field->getProcessShortcodes());
$field = Injector::inst()->create("HTMLFragment(['shortcodes' => false])", 'MyField');
$this->assertEquals(false, $field->getProcessShortcodes());
// Mix options
$field = Injector::inst()->create("HTMLFragment(['shortcodes' => true, 'whitelist' => ['a'])", 'MyField');
$this->assertEquals(true, $field->getProcessShortcodes());
$this->assertEquals(['a'], $field->getWhitelist());
public function providerToPlain()
return [
'<p><img />Lots of <strong>HTML <i>nested</i></strong> tags',
'Lots of HTML nested tags',
'<p>Multi</p><p>Paragraph<br>Also has multilines.</p>',
"Multi\n\nParagraph\nAlso has multilines.",
'<p>Collapses</p><p></p><p>Excessive<br/><br /><br>Newlines</p>',
* @dataProvider providerToPlain
* @param string $html
* @param string $plain
public function testToPlain($html, $plain)
* @var DBHTMLText $textObj
$textObj = DBField::create_field('HTMLFragment', $html);
$this->assertEquals($plain, $textObj->Plain());
* each test is in the format input, charactere limit, highlight, expected output
* @return array
public function providerContextSummary()
return [
'This is some text. It is a test',
'... text. It is a <mark>test</mark>'
// Retains case of original string
'This is some test text. Test test what if you have multiple keywords.',
'some test',
'This is <mark>some</mark> <mark>test</mark> text.'
. ' <mark>Test</mark> <mark>test</mark> what if you have...'
'Here is some text & HTML included',
'... text & <mark>HTML</mark> inc...'
'A dog ate a cat while looking at a Foobar',
// test that it does not highlight too much (eg every a)
'A dog ate a cat while looking at a Foobar',
'A dog ate a cat while looking at a Foobar',
// it should highlight 3 letters or more.
'A dog <mark>ate</mark> a cat while looking at a Foobar',
// HTML Content is plain-textified, and incorrect tags removed
'<p>A dog ate a cat while <mark>looking</mark> at a Foobar</p>',
// it should highlight 3 letters or more.
'A dog <mark>ate</mark> a cat while looking at a Foobar',
'<p>This is a lot of text before this but really, this is a test sentence</p>
<p>with about more stuff after the line break</p>',
'... really, this is a <mark>test</mark> sentence...'
'<p>This is a lot of text before this but really, this is a test sentence</p>
<p>with about more stuff after the line break</p>',
'... sentence<br />
<br />
<mark>with</mark> about more stuff...'
* @dataProvider providerContextSummary
* @param string $originalValue Input
* @param int $limit Numer of characters
* @param string $keywords Keywords to highlight
* @param string $expectedValue Expected output (XML encoded safely)
public function testContextSummary($originalValue, $limit, $keywords, $expectedValue)
$text = DBField::create_field('HTMLFragment', $originalValue);
$result = $text->obj('ContextSummary', [$limit, $keywords])->forTemplate();
// it should highlight 3 letters or more.
$this->assertEquals($expectedValue, $result);
public function testRAW()
$data = DBField::create_field('HTMLFragment', 'This & This');
$this->assertEquals('This & This', $data->RAW());
$data = DBField::create_field('HTMLFragment', 'This & This');
$this->assertEquals('This & This', $data->RAW());
public function testXML()
$data = DBField::create_field('HTMLFragment', 'This & This');
$this->assertEquals('This & This', $data->XML());
$data = DBField::create_field('HTMLFragment', 'This & This');
$this->assertEquals('This &amp; This', $data->XML());
public function testHTML()
$data = DBField::create_field('HTMLFragment', 'This & This');
$this->assertEquals('This & This', $data->HTML());
$data = DBField::create_field('HTMLFragment', 'This & This');
$this->assertEquals('This &amp; This', $data->HTML());
public function testJS()
$data = DBField::create_field('HTMLText', '"this is & test"');
$this->assertEquals('\"this is \x26amp; test\"', $data->JS());
public function testATT()
// HTML Fragment
$data = DBField::create_field('HTMLFragment', '"this is a test"');
$this->assertEquals('"this is a test"', $data->ATT());
// HTML Text (passes shortcodes + tidy)
$data = DBField::create_field('HTMLText', '"');
$this->assertEquals('"', $data->ATT());
public function testShortcodesProcessed()
* @var DBHTMLText $obj
$obj = DBField::create_field(
'<p>Some content <strong>[test_shortcode]</strong> with shortcode</p>'
// Basic DBField methods process shortcodes
'Some content shortcode content with shortcode',
'<p>Some content <strong>shortcode content</strong> with shortcode</p>',
'<p>Some content <strong>shortcode content</strong> with shortcode</p>',
'<p>Some content <strong>shortcode content</strong> with shortcode</p>',
// Test summary methods
'Some content shortcode...',
'Some content shortcode content with shortcode',
'Some content shortco...',
function testExists()
$h = new DBHTMLText();
function testWhitelist()
$textObj = new DBHTMLText('Test', ['whitelist'=> 'meta,link']);
'<meta content="Keep"><link href="Also Keep">',
$textObj->whitelistContent('<meta content="Keep"><p>Remove</p><link href="Also Keep" />Remove Text'),
'Removes any elements not in whitelist excluding text elements'
$textObj = new DBHTMLText('Test', ['whitelist'=> 'meta,link,text()']);
'<meta content="Keep"><link href="Also Keep">Keep Text',
$textObj->whitelistContent('<meta content="Keep"><p>Remove</p><link href="Also Keep" />Keep Text'),
'Removes any elements not in whitelist including text elements'
public function testShortCodeParsedInRAW()
$parser = ShortcodeParser::get('HTMLTextTest');
function ($arguments, $content, $parser, $tagName, $extra) {
return 'replaced';
* @var DBHTMLText $field
$field = DBField::create_field('HTMLText', '<p>[shortcode]</p>');
$this->assertEquals('<p>replaced</p>', $field->RAW());
$this->assertEquals('<p>replaced</p>', (string)$field);
'shortcodes' => false,
$this->assertEquals('<p>[shortcode]</p>', $field->RAW());
$this->assertEquals('<p>[shortcode]</p>', (string)$field);
public function testShortCodeParsedInTemplateHelpers()
$parser = ShortcodeParser::get('HTMLTextTest');
function ($arguments, $content, $parser, $tagName, $extra) {
return 'Replaced short code with this. <a href="home">home</a>';
* @var DBHTMLText $field
$field = DBField::create_field('HTMLText', '<p>[shortcode]</p>');
'<p>Replaced short code with this. <a href="home">home</a></p>',
'<p>Replaced short code with this. <a href="home">home</a></p>',
'<p>Replaced short code with this. <a href="home">home</a></p>',
'\x3cp\x3eReplaced short code with this. \x3ca href=\"home\"\x3ehome\x3c/a\x3e\x3c/p\x3e',
'<p>Replaced short code with this. <a href="home">home</a></p>',
'<p>Replaced short code with this. <a href="home">home</a></p>',
$field->LimitCharacters(4, '...')
$field->LimitCharactersToClosestWord(10, '...')
$field->LimitWordCount(1, '...')
'<p>replaced short code with this. <a href="home">home</a></p>',
'Replaced short code with this. home',
Director::config()->set('alternate_base_url', 'http://example.com/');
'<p>Replaced short code with this. <a href="http://example.com/home">home</a></p>',
'Replaced short code with this.',
'Replaced short code with this.',
'Replaced short...',
'Replaced short code with this. home',
'Replaced <mark>short</mark> <mark>code</mark> with this. home',
$field->ContextSummary(500, 'short code')
public function testValidUtf8()
// Install a UTF-8 locale
$this->previousLocaleSetting = setlocale(LC_CTYPE, 0);
$locales = array('en_US.UTF-8', 'en_NZ.UTF-8', 'de_DE.UTF-8');
$localeInstalled = false;
foreach ($locales as $locale) {
if ($localeInstalled = setlocale(LC_CTYPE, $locale)) {
// If the system doesn't have any of the UTF-8 locales, exit early
if ($localeInstalled === false) {
$this->markTestIncomplete('Unable to run this test because of missing locale!');
$problematicText = html_entity_decode('<p>This is a Test with non-breaking space!</p>', ENT_COMPAT, 'UTF-8');
$textObj = new DBHTMLText('Test');
$this->assertTrue(mb_check_encoding($textObj->FirstSentence(), 'UTF-8'));
$this->assertTrue(mb_check_encoding($textObj->Summary(), 'UTF-8'));