mirror of
https://github.com/silverstripe/silverstripe-framework
synced 2024-10-22 14:05:37 +02:00
Fix OS X issue with Convert::html2raw
, HTMLText::FirstSentence
, HTMLText::Summary
and Text::FirstSentence
.
Use unicode modifier for regular expressions that deal with whitespace. Added unit-tests to ensure no invalud utf-8 gets generated by these methods.
This commit is contained in:
parent
3a7261ac6e
commit
a6db16b229
@ -331,10 +331,10 @@ class Convert {
|
||||
|
||||
// Expand hyperlinks
|
||||
if(!$preserveLinks && !$config['PreserveLinks']) {
|
||||
$data = preg_replace_callback('/<a[^>]*href\s*=\s*"([^"]*)">(.*?)<\/a>/i', function($matches) {
|
||||
$data = preg_replace_callback('/<a[^>]*href\s*=\s*"([^"]*)">(.*?)<\/a>/ui', function($matches) {
|
||||
return Convert::html2raw($matches[2]) . "[$matches[1]]";
|
||||
}, $data);
|
||||
$data = preg_replace_callback('/<a[^>]*href\s*=\s*([^ ]*)>(.*?)<\/a>/i', function($matches) {
|
||||
$data = preg_replace_callback('/<a[^>]*href\s*=\s*([^ ]*)>(.*?)<\/a>/ui', function($matches) {
|
||||
return Convert::html2raw($matches[2]) . "[$matches[1]]";
|
||||
}, $data);
|
||||
}
|
||||
@ -347,13 +347,13 @@ class Convert {
|
||||
|
||||
// Compress whitespace
|
||||
if($config['CompressWhitespace']) {
|
||||
$data = preg_replace("/\s+/", " ", $data);
|
||||
$data = preg_replace("/\s+/u", " ", $data);
|
||||
}
|
||||
|
||||
// Parse newline tags
|
||||
$data = preg_replace("/\s*<[Hh][1-6]([^A-Za-z0-9>][^>]*)?> */", "\n\n", $data);
|
||||
$data = preg_replace("/\s*<[Pp]([^A-Za-z0-9>][^>]*)?> */", "\n\n", $data);
|
||||
$data = preg_replace("/\s*<[Dd][Ii][Vv]([^A-Za-z0-9>][^>]*)?> */", "\n\n", $data);
|
||||
$data = preg_replace("/\s*<[Hh][1-6]([^A-Za-z0-9>][^>]*)?> */u", "\n\n", $data);
|
||||
$data = preg_replace("/\s*<[Pp]([^A-Za-z0-9>][^>]*)?> */u", "\n\n", $data);
|
||||
$data = preg_replace("/\s*<[Dd][Ii][Vv]([^A-Za-z0-9>][^>]*)?> */u", "\n\n", $data);
|
||||
$data = preg_replace("/\n\n\n+/", "\n\n", $data);
|
||||
|
||||
$data = preg_replace("/<[Bb][Rr]([^A-Za-z0-9>][^>]*)?> */", "\n", $data);
|
||||
|
@ -140,8 +140,8 @@ class HTMLText extends Text {
|
||||
/* See if we can pull a paragraph out*/
|
||||
|
||||
// Strip out any images in case there's one at the beginning. Not doing this will return a blank paragraph
|
||||
$str = preg_replace('{^\s*(<.+?>)*<img[^>]*>}', '', $this->value);
|
||||
if (preg_match('{<p(\s[^<>]*)?>(.*[A-Za-z]+.*)</p>}', $str, $matches)) $str = $matches[2];
|
||||
$str = preg_replace('{^\s*(<.+?>)*<img[^>]*>}u', '', $this->value);
|
||||
if (preg_match('{<p(\s[^<>]*)?>(.*[A-Za-z]+.*)</p>}u', $str, $matches)) $str = $matches[2];
|
||||
|
||||
/* If _that_ failed, just use the whole text */
|
||||
if (!$str) $str = $this->value;
|
||||
@ -155,7 +155,7 @@ class HTMLText extends Text {
|
||||
|
||||
/* Now split into words. If we are under the maxWords limit, just return the whole string (re-implode for
|
||||
* whitespace normalization) */
|
||||
$words = preg_split('/\s+/', $str);
|
||||
$words = preg_split('/\s+/u', $str);
|
||||
if ($maxWords == -1 || count($words) <= $maxWords) return implode(' ', $words);
|
||||
|
||||
/* Otherwise work backwards for a looking for a sentence ending (we try to avoid abbreviations, but aren't
|
||||
@ -183,7 +183,7 @@ class HTMLText extends Text {
|
||||
$paragraph = $this->Summary(-1);
|
||||
|
||||
/* Then look for the first sentence ending. We could probably use a nice regex, but for now this will do */
|
||||
$words = preg_split('/\s+/', $paragraph);
|
||||
$words = preg_split('/\s+/u', $paragraph);
|
||||
foreach ($words as $i => $word) {
|
||||
if (preg_match('/(!|\?|\.)$/', $word) && !preg_match('/(Dr|Mr|Mrs|Ms|Miss|Sr|Jr|No)\.$/i', $word)) {
|
||||
return implode(' ', array_slice($words, 0, $i+1));
|
||||
@ -270,7 +270,7 @@ class HTMLText extends Text {
|
||||
|
||||
// If it's just one or two tags on its own (and not the above) it's empty.
|
||||
// This might be <p></p> or <h1></h1> or whatever.
|
||||
if(preg_match('/^[\\s]*(<[^>]+>[\\s]*){1,2}$/', $value)) {
|
||||
if(preg_match('/^[\\s]*(<[^>]+>[\\s]*){1,2}$/u', $value)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -97,7 +97,7 @@ class Text extends StringField {
|
||||
$paragraph = Convert::xml2raw( $this->RAW() );
|
||||
if( !$paragraph ) return "";
|
||||
|
||||
$words = preg_split('/\s+/', $paragraph);
|
||||
$words = preg_split('/\s+/u', $paragraph);
|
||||
foreach ($words as $i => $word) {
|
||||
if (preg_match('/(!|\?|\.)$/', $word) && !preg_match('/(Dr|Mr|Mrs|Ms|Miss|Sr|Jr|No)\.$/i', $word)) {
|
||||
return implode(' ', array_slice($words, 0, $i+1));
|
||||
|
@ -10,6 +10,24 @@ class ConvertTest extends SapphireTest {
|
||||
|
||||
protected $usesDatabase = false;
|
||||
|
||||
private $previousLocaleSetting = null;
|
||||
|
||||
public function setUp()
|
||||
{
|
||||
parent::setUp();
|
||||
// clear the previous locale setting
|
||||
$this->previousLocaleSetting = null;
|
||||
}
|
||||
|
||||
public function tearDown()
|
||||
{
|
||||
parent::tearDown();
|
||||
// If a test sets the locale, reset it on teardown
|
||||
if ($this->previousLocaleSetting) {
|
||||
setlocale(LC_CTYPE, $this->previousLocaleSetting);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests {@link Convert::raw2att()}
|
||||
*/
|
||||
@ -396,4 +414,28 @@ XML
|
||||
Convert::base64url_decode(Convert::base64url_encode($data))
|
||||
);
|
||||
}
|
||||
|
||||
public function testValidUtf8()
|
||||
{
|
||||
// Install a UTF-8 locale
|
||||
$this->previousLocaleSetting = setlocale(LC_CTYPE, 0);
|
||||
|
||||
$locales = array('en_US.UTF-8', 'en_NZ.UTF-8', 'de_DE.UTF-8');
|
||||
$localeInstalled = false;
|
||||
foreach ($locales as $locale) {
|
||||
if ($localeInstalled = setlocale(LC_CTYPE, $locale)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If the system doesn't have any of the UTF-8 locales, exit early
|
||||
if ($localeInstalled === false) {
|
||||
$this->markTestIncomplete('Unable to run this test because of missing locale!');
|
||||
return;
|
||||
}
|
||||
|
||||
$problematicText = html_entity_decode('<p>This is a Test with non-breaking space!</p>', ENT_COMPAT, 'UTF-8');
|
||||
|
||||
$this->assertTrue(mb_check_encoding(Convert::html2raw($problematicText), 'UTF-8'));
|
||||
}
|
||||
}
|
||||
|
@ -5,6 +5,24 @@
|
||||
*/
|
||||
class HTMLTextTest extends SapphireTest {
|
||||
|
||||
private $previousLocaleSetting = null;
|
||||
|
||||
public function setUp()
|
||||
{
|
||||
parent::setUp();
|
||||
// clear the previous locale setting
|
||||
$this->previousLocaleSetting = null;
|
||||
}
|
||||
|
||||
public function tearDown()
|
||||
{
|
||||
parent::tearDown();
|
||||
// If a test sets the locale, reset it on teardown
|
||||
if ($this->previousLocaleSetting) {
|
||||
setlocale(LC_CTYPE, $this->previousLocaleSetting);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test {@link HTMLText->LimitCharacters()}
|
||||
*/
|
||||
@ -314,4 +332,31 @@ class HTMLTextTest extends SapphireTest {
|
||||
|
||||
ShortcodeParser::set_active('default');
|
||||
}
|
||||
|
||||
public function testValidUtf8()
|
||||
{
|
||||
// Install a UTF-8 locale
|
||||
$this->previousLocaleSetting = setlocale(LC_CTYPE, 0);
|
||||
$locales = array('en_US.UTF-8', 'en_NZ.UTF-8', 'de_DE.UTF-8');
|
||||
$localeInstalled = false;
|
||||
foreach ($locales as $locale) {
|
||||
if ($localeInstalled = setlocale(LC_CTYPE, $locale)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If the system doesn't have any of the UTF-8 locales, exit early
|
||||
if ($localeInstalled === false) {
|
||||
$this->markTestIncomplete('Unable to run this test because of missing locale!');
|
||||
return;
|
||||
}
|
||||
|
||||
$problematicText = html_entity_decode('<p>This is a Test with non-breaking space!</p>', ENT_COMPAT, 'UTF-8');
|
||||
|
||||
$textObj = new HTMLText('Test');
|
||||
$textObj->setValue($problematicText);
|
||||
|
||||
$this->assertTrue(mb_check_encoding($textObj->FirstSentence(), 'UTF-8'));
|
||||
$this->assertTrue(mb_check_encoding($textObj->Summary(), 'UTF-8'));
|
||||
}
|
||||
}
|
||||
|
@ -5,6 +5,24 @@
|
||||
*/
|
||||
class TextTest extends SapphireTest {
|
||||
|
||||
private $previousLocaleSetting = null;
|
||||
|
||||
public function setUp()
|
||||
{
|
||||
parent::setUp();
|
||||
// clear the previous locale setting
|
||||
$this->previousLocaleSetting = null;
|
||||
}
|
||||
|
||||
public function tearDown()
|
||||
{
|
||||
parent::tearDown();
|
||||
// If a test sets the locale, reset it on teardown
|
||||
if ($this->previousLocaleSetting) {
|
||||
setlocale(LC_CTYPE, $this->previousLocaleSetting);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test {@link Text->LimitCharacters()}
|
||||
*/
|
||||
@ -240,4 +258,30 @@ class TextTest extends SapphireTest {
|
||||
$data = DBField::create_field('Text', '"this is a test"');
|
||||
$this->assertEquals($data->ATT(), '"this is a test"');
|
||||
}
|
||||
|
||||
public function testValidUtf8()
|
||||
{
|
||||
// Install a UTF-8 locale
|
||||
$this->previousLocaleSetting = setlocale(LC_CTYPE, 0);
|
||||
$locales = array('en_US.UTF-8', 'en_NZ.UTF-8', 'de_DE.UTF-8');
|
||||
$localeInstalled = false;
|
||||
foreach ($locales as $locale) {
|
||||
if ($localeInstalled = setlocale(LC_CTYPE, $locale)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If the system doesn't have any of the UTF-8 locales, exit early
|
||||
if ($localeInstalled === false) {
|
||||
$this->markTestIncomplete('Unable to run this test because of missing locale!');
|
||||
return;
|
||||
}
|
||||
|
||||
$problematicText = html_entity_decode('This is a Test with non-breaking space!', ENT_COMPAT, 'UTF-8');
|
||||
|
||||
$textObj = new Text('Test');
|
||||
$textObj->setValue($problematicText);
|
||||
|
||||
$this->assertTrue(mb_check_encoding($textObj->FirstSentence(), 'UTF-8'));
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user