diff --git a/src/Core/Convert.php b/src/Core/Convert.php index 23afd6e47..7db3ad03d 100644 --- a/src/Core/Convert.php +++ b/src/Core/Convert.php @@ -385,10 +385,10 @@ class Convert // Expand hyperlinks if (!$preserveLinks && !$config['PreserveLinks']) { - $data = preg_replace_callback('/]*href\s*=\s*"([^"]*)">(.*?)<\/a>/i', function ($matches) { + $data = preg_replace_callback('/]*href\s*=\s*"([^"]*)">(.*?)<\/a>/ui', function ($matches) { return Convert::html2raw($matches[2]) . "[$matches[1]]"; }, $data); - $data = preg_replace_callback('/]*href\s*=\s*([^ ]*)>(.*?)<\/a>/i', function ($matches) { + $data = preg_replace_callback('/]*href\s*=\s*([^ ]*)>(.*?)<\/a>/ui', function ($matches) { return Convert::html2raw($matches[2]) . "[$matches[1]]"; }, $data); } @@ -401,13 +401,13 @@ class Convert // Compress whitespace if ($config['CompressWhitespace']) { - $data = preg_replace("/\s+/", " ", $data); + $data = preg_replace("/\s+/u", " ", $data); } // Parse newline tags - $data = preg_replace("/\s*<[Hh][1-6]([^A-Za-z0-9>][^>]*)?> */", "\n\n", $data); - $data = preg_replace("/\s*<[Pp]([^A-Za-z0-9>][^>]*)?> */", "\n\n", $data); - $data = preg_replace("/\s*<[Dd][Ii][Vv]([^A-Za-z0-9>][^>]*)?> */", "\n\n", $data); + $data = preg_replace("/\s*<[Hh][1-6]([^A-Za-z0-9>][^>]*)?> */u", "\n\n", $data); + $data = preg_replace("/\s*<[Pp]([^A-Za-z0-9>][^>]*)?> */u", "\n\n", $data); + $data = preg_replace("/\s*<[Dd][Ii][Vv]([^A-Za-z0-9>][^>]*)?> */u", "\n\n", $data); $data = preg_replace("/\n\n\n+/", "\n\n", $data); $data = preg_replace("/<[Bb][Rr]([^A-Za-z0-9>][^>]*)?> */", "\n", $data); diff --git a/src/ORM/FieldType/DBText.php b/src/ORM/FieldType/DBText.php index 84967951b..4e9448650 100644 --- a/src/ORM/FieldType/DBText.php +++ b/src/ORM/FieldType/DBText.php @@ -80,7 +80,7 @@ class DBText extends DBString } // Do a word-search - $words = preg_split('/\s+/', $value); + $words = preg_split('/\s+/u', $value); $sentences = 0; foreach ($words as $i => $word) { if (preg_match('/(!|\?|\.)$/', $word) && !preg_match('/(Dr|Mr|Mrs|Ms|Miss|Sr|Jr|No)\.$/i', $word)) { @@ -130,7 +130,7 @@ class DBText extends DBString $sentences = array_filter(array_map(function ($str) { return trim($str); }, preg_split('@(?<=\.)@', $value))); - $wordCount = count(preg_split('#\s+#', $sentences[0])); + $wordCount = count(preg_split('#\s+#u', $sentences[0])); // if the first sentence is too long, show only the first $maxWords words if ($wordCount > $maxWords) { @@ -145,7 +145,7 @@ class DBText extends DBString // If more sentences to process, count number of words if ($sentences) { - $wordCount += count(preg_split('#\s+#', $sentences[0])); + $wordCount += count(preg_split('#\s+#u', $sentences[0])); } } while ($wordCount < $maxWords && $sentences && trim($sentences[0])); diff --git a/tests/php/Core/ConvertTest.php b/tests/php/Core/ConvertTest.php index 77800f6e6..f6ef743af 100644 --- a/tests/php/Core/ConvertTest.php +++ b/tests/php/Core/ConvertTest.php @@ -17,6 +17,24 @@ class ConvertTest extends SapphireTest protected $usesDatabase = false; + private $previousLocaleSetting = null; + + public function setUp() + { + parent::setUp(); + // clear the previous locale setting + $this->previousLocaleSetting = null; + } + + public function tearDown() + { + parent::tearDown(); + // If a test sets the locale, reset it on teardown + if ($this->previousLocaleSetting) { + setlocale(LC_CTYPE, $this->previousLocaleSetting); + } + } + /** * Tests {@link Convert::raw2att()} */ @@ -479,6 +497,30 @@ XML ); } + public function testValidUtf8() + { + // Install a UTF-8 locale + $this->previousLocaleSetting = setlocale(LC_CTYPE, 0); + + $locales = array('en_US.UTF-8', 'en_NZ.UTF-8', 'de_DE.UTF-8'); + $localeInstalled = false; + foreach ($locales as $locale) { + if ($localeInstalled = setlocale(LC_CTYPE, $locale)) { + break; + } + } + + // If the system doesn't have any of the UTF-8 locales, exit early + if ($localeInstalled === false) { + $this->markTestIncomplete('Unable to run this test because of missing locale!'); + return; + } + + $problematicText = html_entity_decode('

This is a Test with non-breaking space!

', ENT_COMPAT, 'UTF-8'); + + $this->assertTrue(mb_check_encoding(Convert::html2raw($problematicText), 'UTF-8')); + } + public function testUpperCamelToLowerCamel() { $this->assertEquals( diff --git a/tests/php/ORM/DBHTMLTextTest.php b/tests/php/ORM/DBHTMLTextTest.php index b7ab4e9a7..46f0e0b0f 100644 --- a/tests/php/ORM/DBHTMLTextTest.php +++ b/tests/php/ORM/DBHTMLTextTest.php @@ -15,10 +15,15 @@ use SilverStripe\View\Parsers\ShortcodeParser; class DBHTMLTextTest extends SapphireTest { + private $previousLocaleSetting = null; + protected function setUp() { parent::setUp(); + // clear the previous locale setting + $this->previousLocaleSetting = null; + // Set test handler ShortcodeParser::get('htmltest') ->register('test_shortcode', array(TestShortcode::class, 'handle_shortcode')); @@ -27,6 +32,12 @@ class DBHTMLTextTest extends SapphireTest protected function tearDown() { + + // If a test sets the locale, reset it on teardown + if ($this->previousLocaleSetting) { + setlocale(LC_CTYPE, $this->previousLocaleSetting); + } + ShortcodeParser::set_active('default'); parent::tearDown(); } @@ -581,4 +592,31 @@ class DBHTMLTextTest extends SapphireTest ShortcodeParser::set_active('default'); } + + public function testValidUtf8() + { + // Install a UTF-8 locale + $this->previousLocaleSetting = setlocale(LC_CTYPE, 0); + $locales = array('en_US.UTF-8', 'en_NZ.UTF-8', 'de_DE.UTF-8'); + $localeInstalled = false; + foreach ($locales as $locale) { + if ($localeInstalled = setlocale(LC_CTYPE, $locale)) { + break; + } + } + + // If the system doesn't have any of the UTF-8 locales, exit early + if ($localeInstalled === false) { + $this->markTestIncomplete('Unable to run this test because of missing locale!'); + return; + } + + $problematicText = html_entity_decode('

This is a Test with non-breaking space!

', ENT_COMPAT, 'UTF-8'); + + $textObj = new DBHTMLText('Test'); + $textObj->setValue($problematicText); + + $this->assertTrue(mb_check_encoding($textObj->FirstSentence(), 'UTF-8')); + $this->assertTrue(mb_check_encoding($textObj->Summary(), 'UTF-8')); + } } diff --git a/tests/php/ORM/DBTextTest.php b/tests/php/ORM/DBTextTest.php index ccb458c1a..d3fbc3d4a 100644 --- a/tests/php/ORM/DBTextTest.php +++ b/tests/php/ORM/DBTextTest.php @@ -4,6 +4,7 @@ namespace SilverStripe\ORM\Tests; use SilverStripe\ORM\FieldType\DBField; use SilverStripe\Dev\SapphireTest; +use SilverStripe\ORM\FieldType\DBText; /** * Tests parsing and summary methods on DBText @@ -11,6 +12,24 @@ use SilverStripe\Dev\SapphireTest; class DBTextTest extends SapphireTest { + private $previousLocaleSetting = null; + + public function setUp() + { + parent::setUp(); + // clear the previous locale setting + $this->previousLocaleSetting = null; + } + + public function tearDown() + { + parent::tearDown(); + // If a test sets the locale, reset it on teardown + if ($this->previousLocaleSetting) { + setlocale(LC_CTYPE, $this->previousLocaleSetting); + } + } + /** * Test {@link Text->LimitCharacters()} */ @@ -272,4 +291,30 @@ class DBTextTest extends SapphireTest $data = DBField::create_field('Text', '"this is a test"'); $this->assertEquals($data->ATT(), '"this is a test"'); } + + public function testValidUtf8() + { + // Install a UTF-8 locale + $this->previousLocaleSetting = setlocale(LC_CTYPE, 0); + $locales = array('en_US.UTF-8', 'en_NZ.UTF-8', 'de_DE.UTF-8'); + $localeInstalled = false; + foreach ($locales as $locale) { + if ($localeInstalled = setlocale(LC_CTYPE, $locale)) { + break; + } + } + + // If the system doesn't have any of the UTF-8 locales, exit early + if ($localeInstalled === false) { + $this->markTestIncomplete('Unable to run this test because of missing locale!'); + return; + } + + $problematicText = html_entity_decode('This is a Test with non-breaking space!', ENT_COMPAT, 'UTF-8'); + + $textObj = new DBText('Test'); + $textObj->setValue($problematicText); + + $this->assertTrue(mb_check_encoding($textObj->FirstSentence(), 'UTF-8')); + } }