mirror of
https://github.com/silverstripe/silverstripe-framework
synced 2024-10-22 14:05:37 +02:00
Merge pull request #11100 from creative-commoners/pulls/5/split-summary-by-punctuation
ENH Split sentences by configurable punctuation for summary
This commit is contained in:
commit
b1a1d4b951
@ -37,6 +37,11 @@ class DBText extends DBString
|
||||
'Summary' => 'Text',
|
||||
];
|
||||
|
||||
/**
|
||||
* Punctuation that marks an end of a sentence for the Summary() method
|
||||
*/
|
||||
private static array $summary_sentence_separators = ['.', '?', '!'];
|
||||
|
||||
/**
|
||||
* (non-PHPdoc)
|
||||
* @see DBField::requireField()
|
||||
@ -130,10 +135,18 @@ class DBText extends DBString
|
||||
$add = $this->defaultEllipsis();
|
||||
}
|
||||
|
||||
// Split on sentences (don't remove period)
|
||||
$sentences = array_filter(array_map(function ($str) {
|
||||
return trim($str ?? '');
|
||||
}, preg_split('@(?<=\.)@', $value ?? '') ?: []));
|
||||
// Split on sentences (don't remove punctuation)
|
||||
$summarySentenceSeparators = preg_quote(implode(static::config()->get('summary_sentence_separators')), '@');
|
||||
$possibleSentences = preg_split('@(?<=[' . $summarySentenceSeparators . '])@', $value ?? '') ?: [];
|
||||
$sentences = [];
|
||||
|
||||
foreach ($possibleSentences as $sentence) {
|
||||
$sentence = trim($sentence);
|
||||
if ($sentence) {
|
||||
$sentences[] = $sentence;
|
||||
}
|
||||
}
|
||||
|
||||
$wordCount = count(preg_split('#\s+#u', $sentences[0] ?? '') ?: []);
|
||||
|
||||
// if the first sentence is too long, show only the first $maxWords words
|
||||
|
@ -282,32 +282,56 @@ class DBTextTest extends SapphireTest
|
||||
public function providerSummary()
|
||||
{
|
||||
return [
|
||||
[
|
||||
'simple test' => [
|
||||
'This is some text. It is a test',
|
||||
3,
|
||||
false,
|
||||
'This is some…',
|
||||
],
|
||||
[
|
||||
'custom ellipses' => [
|
||||
// check custom ellipsis
|
||||
'This is a test text in a longer sentence and a custom ellipsis.',
|
||||
8,
|
||||
'...', // regular dots instead of the ellipsis character
|
||||
'This is a test text in a longer...',
|
||||
],
|
||||
[
|
||||
'umlauts' => [
|
||||
'both schön and können have umlauts',
|
||||
5,
|
||||
false,
|
||||
'both schön and können have…',
|
||||
],
|
||||
[
|
||||
'invalid UTF' => [
|
||||
// check invalid UTF8 handling — input is an invalid UTF sequence, output should be empty string
|
||||
"\xf0\x28\x8c\xbc",
|
||||
50,
|
||||
false,
|
||||
'',
|
||||
],
|
||||
'treats period as sentence boundary' => [
|
||||
'This is some text. It is a test. There are three sentences.',
|
||||
10,
|
||||
false,
|
||||
'This is some text. It is a test.',
|
||||
],
|
||||
'treats exclamation mark as sentence boundary' => [
|
||||
'This is some text! It is a test! There are three sentences.',
|
||||
10,
|
||||
false,
|
||||
'This is some text! It is a test!',
|
||||
],
|
||||
'treats question mark as sentence boundary' => [
|
||||
'This is some text? It is a test? There are three sentences.',
|
||||
10,
|
||||
false,
|
||||
'This is some text? It is a test?',
|
||||
],
|
||||
'does not treat colon as sentence boundary' => [
|
||||
'This is some text: It is a test: There are three sentences.',
|
||||
10,
|
||||
false,
|
||||
'This is some text: It is a test: There are…',
|
||||
],
|
||||
];
|
||||
}
|
||||
|
||||
@ -401,4 +425,15 @@ class DBTextTest extends SapphireTest
|
||||
$result = $text->obj('Summary', [$words, $add])->forTemplate();
|
||||
$this->assertEquals($expectedValue, $result);
|
||||
}
|
||||
|
||||
public function testSummaryConfiguration()
|
||||
{
|
||||
$text = DBField::create_field(DBText::class, 'This is some text: It is a test: There are three sentences.');
|
||||
// Doesn't treat colon as a boundary by default
|
||||
$this->assertSame('This is some text: It is a test: There are…', $text->Summary(10));
|
||||
|
||||
DBText::config()->merge('summary_sentence_separators', [':']);
|
||||
// Does treat colon as a boundary if configured to do so
|
||||
$this->assertSame('This is some text: It is a test:', $text->Summary(10));
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user