mirror of
https://github.com/silverstripe/silverstripe-framework
synced 2024-10-22 14:05:37 +02:00
Merge pull request #11100 from creative-commoners/pulls/5/split-summary-by-punctuation
ENH Split sentences by configurable punctuation for summary
This commit is contained in:
commit
b1a1d4b951
@ -37,6 +37,11 @@ class DBText extends DBString
|
|||||||
'Summary' => 'Text',
|
'Summary' => 'Text',
|
||||||
];
|
];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Punctuation that marks an end of a sentence for the Summary() method
|
||||||
|
*/
|
||||||
|
private static array $summary_sentence_separators = ['.', '?', '!'];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* (non-PHPdoc)
|
* (non-PHPdoc)
|
||||||
* @see DBField::requireField()
|
* @see DBField::requireField()
|
||||||
@ -130,10 +135,18 @@ class DBText extends DBString
|
|||||||
$add = $this->defaultEllipsis();
|
$add = $this->defaultEllipsis();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Split on sentences (don't remove period)
|
// Split on sentences (don't remove punctuation)
|
||||||
$sentences = array_filter(array_map(function ($str) {
|
$summarySentenceSeparators = preg_quote(implode(static::config()->get('summary_sentence_separators')), '@');
|
||||||
return trim($str ?? '');
|
$possibleSentences = preg_split('@(?<=[' . $summarySentenceSeparators . '])@', $value ?? '') ?: [];
|
||||||
}, preg_split('@(?<=\.)@', $value ?? '') ?: []));
|
$sentences = [];
|
||||||
|
|
||||||
|
foreach ($possibleSentences as $sentence) {
|
||||||
|
$sentence = trim($sentence);
|
||||||
|
if ($sentence) {
|
||||||
|
$sentences[] = $sentence;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
$wordCount = count(preg_split('#\s+#u', $sentences[0] ?? '') ?: []);
|
$wordCount = count(preg_split('#\s+#u', $sentences[0] ?? '') ?: []);
|
||||||
|
|
||||||
// if the first sentence is too long, show only the first $maxWords words
|
// if the first sentence is too long, show only the first $maxWords words
|
||||||
|
@ -282,32 +282,56 @@ class DBTextTest extends SapphireTest
|
|||||||
public function providerSummary()
|
public function providerSummary()
|
||||||
{
|
{
|
||||||
return [
|
return [
|
||||||
[
|
'simple test' => [
|
||||||
'This is some text. It is a test',
|
'This is some text. It is a test',
|
||||||
3,
|
3,
|
||||||
false,
|
false,
|
||||||
'This is some…',
|
'This is some…',
|
||||||
],
|
],
|
||||||
[
|
'custom ellipses' => [
|
||||||
// check custom ellipsis
|
// check custom ellipsis
|
||||||
'This is a test text in a longer sentence and a custom ellipsis.',
|
'This is a test text in a longer sentence and a custom ellipsis.',
|
||||||
8,
|
8,
|
||||||
'...', // regular dots instead of the ellipsis character
|
'...', // regular dots instead of the ellipsis character
|
||||||
'This is a test text in a longer...',
|
'This is a test text in a longer...',
|
||||||
],
|
],
|
||||||
[
|
'umlauts' => [
|
||||||
'both schön and können have umlauts',
|
'both schön and können have umlauts',
|
||||||
5,
|
5,
|
||||||
false,
|
false,
|
||||||
'both schön and können have…',
|
'both schön and können have…',
|
||||||
],
|
],
|
||||||
[
|
'invalid UTF' => [
|
||||||
// check invalid UTF8 handling — input is an invalid UTF sequence, output should be empty string
|
// check invalid UTF8 handling — input is an invalid UTF sequence, output should be empty string
|
||||||
"\xf0\x28\x8c\xbc",
|
"\xf0\x28\x8c\xbc",
|
||||||
50,
|
50,
|
||||||
false,
|
false,
|
||||||
'',
|
'',
|
||||||
],
|
],
|
||||||
|
'treats period as sentence boundary' => [
|
||||||
|
'This is some text. It is a test. There are three sentences.',
|
||||||
|
10,
|
||||||
|
false,
|
||||||
|
'This is some text. It is a test.',
|
||||||
|
],
|
||||||
|
'treats exclamation mark as sentence boundary' => [
|
||||||
|
'This is some text! It is a test! There are three sentences.',
|
||||||
|
10,
|
||||||
|
false,
|
||||||
|
'This is some text! It is a test!',
|
||||||
|
],
|
||||||
|
'treats question mark as sentence boundary' => [
|
||||||
|
'This is some text? It is a test? There are three sentences.',
|
||||||
|
10,
|
||||||
|
false,
|
||||||
|
'This is some text? It is a test?',
|
||||||
|
],
|
||||||
|
'does not treat colon as sentence boundary' => [
|
||||||
|
'This is some text: It is a test: There are three sentences.',
|
||||||
|
10,
|
||||||
|
false,
|
||||||
|
'This is some text: It is a test: There are…',
|
||||||
|
],
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -401,4 +425,15 @@ class DBTextTest extends SapphireTest
|
|||||||
$result = $text->obj('Summary', [$words, $add])->forTemplate();
|
$result = $text->obj('Summary', [$words, $add])->forTemplate();
|
||||||
$this->assertEquals($expectedValue, $result);
|
$this->assertEquals($expectedValue, $result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function testSummaryConfiguration()
|
||||||
|
{
|
||||||
|
$text = DBField::create_field(DBText::class, 'This is some text: It is a test: There are three sentences.');
|
||||||
|
// Doesn't treat colon as a boundary by default
|
||||||
|
$this->assertSame('This is some text: It is a test: There are…', $text->Summary(10));
|
||||||
|
|
||||||
|
DBText::config()->merge('summary_sentence_separators', [':']);
|
||||||
|
// Does treat colon as a boundary if configured to do so
|
||||||
|
$this->assertSame('This is some text: It is a test:', $text->Summary(10));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user