diff --git a/control/HTTPResponse.php b/control/HTTPResponse.php index e99fda44f..0b6e2b895 100644 --- a/control/HTTPResponse.php +++ b/control/HTTPResponse.php @@ -152,7 +152,7 @@ class SS_HTTPResponse { $this->body = $body; // Set content-length in bytes. Use mbstring to avoid problems with mb_internal_encoding() and mbstring.func_overload - $this->headers['Content-Length'] = (function_exists('mb_strlen') ? mb_strlen($this->body,'8bit') : strlen($this->body)); + $this->headers['Content-Length'] = mb_strlen($this->body,'8bit'); } function getBody() { diff --git a/model/URLSegmentFilter.php b/model/URLSegmentFilter.php index 47c2a6f81..ee625b7c5 100644 --- a/model/URLSegmentFilter.php +++ b/model/URLSegmentFilter.php @@ -64,13 +64,17 @@ class URLSegmentFilter extends Object { $name = mb_strtolower($name); $replacements = $this->getReplacements(); - if($this->getAllowMultibyte()) { - // unset automated removal of non-ASCII characters, and don't try to transliterate - if(isset($replacements['/[^A-Za-z0-9+.-]+/u'])) unset($replacements['/[^A-Za-z0-9+.-]+/u']); - } + + // Unset automated removal of non-ASCII characters, and don't try to transliterate + if($this->getAllowMultibyte() && isset($replacements['/[^A-Za-z0-9+.-]+/u'])) unset($replacements['/[^A-Za-z0-9+.-]+/u']); + foreach($replacements as $regex => $replace) { $name = preg_replace($regex, $replace, $name); } + + // Multibyte URLs require percent encoding to comply to RFC 3986. + // Without this setting, the "remove non-ASCII chars" regex takes care of that. + if($this->getAllowMultibyte()) $name = rawurlencode($name); return $name; } diff --git a/static-main.php b/static-main.php index a3f4ed8ff..339603573 100644 --- a/static-main.php +++ b/static-main.php @@ -83,9 +83,11 @@ if ( $file = isset($homepageMap[$_SERVER['HTTP_HOST']]) ? $homepageMap[$_SERVER['HTTP_HOST']] : $file; } + // Encode each part of the path individually, in order to support multibyte paths. + // SiteTree.URLSegment and hence the static folder and filenames are stored in encoded form, + // to avoid filesystem incompatibilities. + $file = implode('/', array_map('rawurlencode', explode('/', $file))); // Find file by extension (either *.html or *.php) - $file = preg_replace('/[^a-zA-Z0-9\/\-_]/si', '-', $file); - if (file_exists($cacheBaseDir . $cacheDir . $file . '.html')) { header('X-SilverStripe-Cache: hit at '.@date('r')); echo file_get_contents($cacheBaseDir . $cacheDir . $file . '.html'); diff --git a/tests/core/ConvertTest.php b/tests/core/ConvertTest.php index c4f69c741..859b02dc2 100644 --- a/tests/core/ConvertTest.php +++ b/tests/core/ConvertTest.php @@ -116,10 +116,13 @@ class ConvertTest extends SapphireTest { * @todo test toASCII() */ function testRaw2URL() { + $orig = URLSegmentFilter::$default_allow_multibyte; + URLSegmentFilter::$default_allow_multibyte = false; $this->assertEquals('foo', Convert::raw2url('foo')); $this->assertEquals('foo-and-bar', Convert::raw2url('foo & bar')); $this->assertEquals('foo-and-bar', Convert::raw2url('foo & bar!')); $this->assertEquals('foos-bar-2', Convert::raw2url('foo\'s [bar] (2)')); + URLSegmentFilter::$default_allow_multibyte = $orig; } } diff --git a/tests/model/URLSegmentFilterTest.php b/tests/model/URLSegmentFilterTest.php index 3c0a263e8..79574f90a 100644 --- a/tests/model/URLSegmentFilterTest.php +++ b/tests/model/URLSegmentFilterTest.php @@ -27,7 +27,7 @@ class URLSegmentFilterTest extends SapphireTest { $f = new URLSegmentFilter(); $f->setAllowMultibyte(true); $this->assertEquals( - 'brötchen', + urlencode('brötchen'), $f->filter('Brötchen') ); }