diff --git a/model/URLSegmentFilter.php b/model/URLSegmentFilter.php index 47c2a6f81..ee625b7c5 100644 --- a/model/URLSegmentFilter.php +++ b/model/URLSegmentFilter.php @@ -64,13 +64,17 @@ class URLSegmentFilter extends Object { $name = mb_strtolower($name); $replacements = $this->getReplacements(); - if($this->getAllowMultibyte()) { - // unset automated removal of non-ASCII characters, and don't try to transliterate - if(isset($replacements['/[^A-Za-z0-9+.-]+/u'])) unset($replacements['/[^A-Za-z0-9+.-]+/u']); - } + + // Unset automated removal of non-ASCII characters, and don't try to transliterate + if($this->getAllowMultibyte() && isset($replacements['/[^A-Za-z0-9+.-]+/u'])) unset($replacements['/[^A-Za-z0-9+.-]+/u']); + foreach($replacements as $regex => $replace) { $name = preg_replace($regex, $replace, $name); } + + // Multibyte URLs require percent encoding to comply to RFC 3986. + // Without this setting, the "remove non-ASCII chars" regex takes care of that. + if($this->getAllowMultibyte()) $name = rawurlencode($name); return $name; } diff --git a/tests/model/URLSegmentFilterTest.php b/tests/model/URLSegmentFilterTest.php index 3c0a263e8..79574f90a 100644 --- a/tests/model/URLSegmentFilterTest.php +++ b/tests/model/URLSegmentFilterTest.php @@ -27,7 +27,7 @@ class URLSegmentFilterTest extends SapphireTest { $f = new URLSegmentFilter(); $f->setAllowMultibyte(true); $this->assertEquals( - 'brötchen', + urlencode('brötchen'), $f->filter('Brötchen') ); }