diff --git a/model/URLSegmentFilter.php b/model/URLSegmentFilter.php index 03fc062e5..83c5fa363 100644 --- a/model/URLSegmentFilter.php +++ b/model/URLSegmentFilter.php @@ -29,9 +29,9 @@ class URLSegmentFilter extends Object { private static $default_replacements = array( '/&/u' => '-and-', '/&/u' => '-and-', - '/\s/u' => '-', // remove whitespace + '/\s|\+/u' => '-', // remove whitespace/plus '/[_.]+/u' => '-', // underscores and dots to dashes - '/[^A-Za-z0-9+\-]+/u' => '', // remove non-ASCII chars, only allow alphanumeric and dashes + '/[^A-Za-z0-9\-]+/u' => '', // remove non-ASCII chars, only allow alphanumeric and dashes '/[\-]{2,}/u' => '-', // remove duplicate dashes '/^[\-_]/u' => '', // Remove all leading dashes or underscores ); @@ -69,8 +69,8 @@ class URLSegmentFilter extends Object { $replacements = $this->getReplacements(); // Unset automated removal of non-ASCII characters, and don't try to transliterate - if($this->getAllowMultibyte() && isset($replacements['/[^A-Za-z0-9+\-]+/u'])) { - unset($replacements['/[^A-Za-z0-9+\-]+/u']); + if($this->getAllowMultibyte() && isset($replacements['/[^A-Za-z0-9\-]+/u'])) { + unset($replacements['/[^A-Za-z0-9\-]+/u']); } foreach($replacements as $regex => $replace) { diff --git a/tests/model/URLSegmentFilterTest.php b/tests/model/URLSegmentFilterTest.php index c8d029ea5..44e8a69ea 100644 --- a/tests/model/URLSegmentFilterTest.php +++ b/tests/model/URLSegmentFilterTest.php @@ -5,6 +5,8 @@ */ class URLSegmentFilterTest extends SapphireTest { + protected $usesDatabase = false; + public function testReplacesCommonEnglishSymbols() { $f = new URLSegmentFilter(); $f->setAllowMultibyte(false); @@ -14,6 +16,19 @@ class URLSegmentFilterTest extends SapphireTest { ); } + public function testReplacesWhitespace() { + $f = new URLSegmentFilter(); + $f->setAllowMultibyte(false); + $this->assertEquals( + 'john-and-spencer', + $f->filter('John and Spencer') + ); + $this->assertEquals( + 'john-and-spencer', + $f->filter('John+and+Spencer') + ); + } + public function testTransliteratesNonAsciiUrls() { $f = new URLSegmentFilter(); $f->setAllowMultibyte(false);