From bf629dfabd0587447384bda4b997760478b302a0 Mon Sep 17 00:00:00 2001 From: Guy Sartorelli Date: Sat, 4 Dec 2021 18:43:36 +0100 Subject: [PATCH] ENH Split sentences by configurable punctuation for summary Co-authored-by: Lukas Erni --- src/ORM/FieldType/DBText.php | 21 ++++++++++++++---- tests/php/ORM/DBTextTest.php | 43 ++++++++++++++++++++++++++++++++---- 2 files changed, 56 insertions(+), 8 deletions(-) diff --git a/src/ORM/FieldType/DBText.php b/src/ORM/FieldType/DBText.php index ce60aa295..f00252585 100644 --- a/src/ORM/FieldType/DBText.php +++ b/src/ORM/FieldType/DBText.php @@ -37,6 +37,11 @@ class DBText extends DBString 'Summary' => 'Text', ]; + /** + * Punctuation that marks an end of a sentence for the Summary() method + */ + private static array $summary_sentence_separators = ['.', '?', '!']; + /** * (non-PHPdoc) * @see DBField::requireField() @@ -130,10 +135,18 @@ class DBText extends DBString $add = $this->defaultEllipsis(); } - // Split on sentences (don't remove period) - $sentences = array_filter(array_map(function ($str) { - return trim($str ?? ''); - }, preg_split('@(?<=\.)@', $value ?? '') ?: [])); + // Split on sentences (don't remove punctuation) + $summarySentenceSeparators = preg_quote(implode(static::config()->get('summary_sentence_separators')), '@'); + $possibleSentences = preg_split('@(?<=[' . $summarySentenceSeparators . '])@', $value ?? '') ?: []; + $sentences = []; + + foreach ($possibleSentences as $sentence) { + $sentence = trim($sentence); + if ($sentence) { + $sentences[] = $sentence; + } + } + $wordCount = count(preg_split('#\s+#u', $sentences[0] ?? '') ?: []); // if the first sentence is too long, show only the first $maxWords words diff --git a/tests/php/ORM/DBTextTest.php b/tests/php/ORM/DBTextTest.php index 7a2a1888e..cb2f0fa48 100644 --- a/tests/php/ORM/DBTextTest.php +++ b/tests/php/ORM/DBTextTest.php @@ -282,32 +282,56 @@ class DBTextTest extends SapphireTest public function providerSummary() { return [ - [ + 'simple test' => [ 'This is some text. It is a test', 3, false, 'This is some…', ], - [ + 'custom ellipses' => [ // check custom ellipsis 'This is a test text in a longer sentence and a custom ellipsis.', 8, '...', // regular dots instead of the ellipsis character 'This is a test text in a longer...', ], - [ + 'umlauts' => [ 'both schön and können have umlauts', 5, false, 'both schön and können have…', ], - [ + 'invalid UTF' => [ // check invalid UTF8 handling — input is an invalid UTF sequence, output should be empty string "\xf0\x28\x8c\xbc", 50, false, '', ], + 'treats period as sentence boundary' => [ + 'This is some text. It is a test. There are three sentences.', + 10, + false, + 'This is some text. It is a test.', + ], + 'treats exclamation mark as sentence boundary' => [ + 'This is some text! It is a test! There are three sentences.', + 10, + false, + 'This is some text! It is a test!', + ], + 'treats question mark as sentence boundary' => [ + 'This is some text? It is a test? There are three sentences.', + 10, + false, + 'This is some text? It is a test?', + ], + 'does not treat colon as sentence boundary' => [ + 'This is some text: It is a test: There are three sentences.', + 10, + false, + 'This is some text: It is a test: There are…', + ], ]; } @@ -401,4 +425,15 @@ class DBTextTest extends SapphireTest $result = $text->obj('Summary', [$words, $add])->forTemplate(); $this->assertEquals($expectedValue, $result); } + + public function testSummaryConfiguration() + { + $text = DBField::create_field(DBText::class, 'This is some text: It is a test: There are three sentences.'); + // Doesn't treat colon as a boundary by default + $this->assertSame('This is some text: It is a test: There are…', $text->Summary(10)); + + DBText::config()->merge('summary_sentence_separators', [':']); + // Does treat colon as a boundary if configured to do so + $this->assertSame('This is some text: It is a test:', $text->Summary(10)); + } }