From 8281678d4c57a97c2116fbbc130d1d004eccb469 Mon Sep 17 00:00:00 2001 From: Ingo Schommer Date: Mon, 6 Feb 2012 11:56:26 +0100 Subject: [PATCH] BUGFIX Urlencode paths in URLSegmentFilter when $allowMultibyte=true to avoid creating invalid URLs (and breaking assumptions based on ascii-only URLs, such as static publishing filename creation) --- model/URLSegmentFilter.php | 12 ++++++++---- tests/model/URLSegmentFilterTest.php | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/model/URLSegmentFilter.php b/model/URLSegmentFilter.php index 47c2a6f81..ee625b7c5 100644 --- a/model/URLSegmentFilter.php +++ b/model/URLSegmentFilter.php @@ -64,13 +64,17 @@ class URLSegmentFilter extends Object { $name = mb_strtolower($name); $replacements = $this->getReplacements(); - if($this->getAllowMultibyte()) { - // unset automated removal of non-ASCII characters, and don't try to transliterate - if(isset($replacements['/[^A-Za-z0-9+.-]+/u'])) unset($replacements['/[^A-Za-z0-9+.-]+/u']); - } + + // Unset automated removal of non-ASCII characters, and don't try to transliterate + if($this->getAllowMultibyte() && isset($replacements['/[^A-Za-z0-9+.-]+/u'])) unset($replacements['/[^A-Za-z0-9+.-]+/u']); + foreach($replacements as $regex => $replace) { $name = preg_replace($regex, $replace, $name); } + + // Multibyte URLs require percent encoding to comply to RFC 3986. + // Without this setting, the "remove non-ASCII chars" regex takes care of that. + if($this->getAllowMultibyte()) $name = rawurlencode($name); return $name; } diff --git a/tests/model/URLSegmentFilterTest.php b/tests/model/URLSegmentFilterTest.php index 3c0a263e8..79574f90a 100644 --- a/tests/model/URLSegmentFilterTest.php +++ b/tests/model/URLSegmentFilterTest.php @@ -27,7 +27,7 @@ class URLSegmentFilterTest extends SapphireTest { $f = new URLSegmentFilter(); $f->setAllowMultibyte(true); $this->assertEquals( - 'brötchen', + urlencode('brötchen'), $f->filter('Brötchen') ); }