diff --git a/model/URLSegmentFilter.php b/model/URLSegmentFilter.php index 983d6a0cb..870622677 100644 --- a/model/URLSegmentFilter.php +++ b/model/URLSegmentFilter.php @@ -29,7 +29,7 @@ class URLSegmentFilter extends Object { '/&/u' => '-and-', '/\s/u' => '-', // remove whitespace '/_/u' => '-', // underscores to dashes - '/[^A-Za-z0-9+.-]+/u' => '', // remove non-ASCII chars, only allow alphanumeric plus dash and dot + '/[^A-Za-z0-9.-]+/u' => '', // remove non-ASCII chars, only allow alphanumeric, dashes and dots. '/[\-]{2,}/u' => '-', // remove duplicate dashes '/^[\.\-_]/u' => '', // Remove all leading dots, dashes or underscores ); @@ -66,8 +66,8 @@ class URLSegmentFilter extends Object { $replacements = $this->getReplacements(); // Unset automated removal of non-ASCII characters, and don't try to transliterate - if($this->getAllowMultibyte() && isset($replacements['/[^A-Za-z0-9+.-]+/u'])) { - unset($replacements['/[^A-Za-z0-9+.-]+/u']); + if($this->getAllowMultibyte() && isset($replacements['/[^A-Za-z0-9.-]+/u'])) { + unset($replacements['/[^A-Za-z0-9.-]+/u']); } foreach($replacements as $regex => $replace) { diff --git a/tests/model/URLSegmentFilterTest.php b/tests/model/URLSegmentFilterTest.php index 38adedef2..c5567be31 100644 --- a/tests/model/URLSegmentFilterTest.php +++ b/tests/model/URLSegmentFilterTest.php @@ -22,7 +22,15 @@ class URLSegmentFilterTest extends SapphireTest { $f->filter('Brötchen') ); } - + + public function testReplacesCommonNonAsciiCharacters() { + $f = new URLSegmentFilter(); + $this->assertEquals( + urlencode('aa1-.'), + $f->filter('Aa1~!@#$%^*()_+`-=;\':"[]\{}|,./<>?') + ); + } + public function testRetainsNonAsciiUrlsWithAllowMultiByteOption() { $f = new URLSegmentFilter(); $f->setAllowMultibyte(true);