mirror of
https://github.com/silverstripe/silverstripe-framework
synced 2024-10-22 14:05:37 +02:00
Merge pull request #118 from silverstripe/multibyte-urlsegment
URLSegmentFilter API (sapphire/master)
This commit is contained in:
commit
a8f57be2f2
@ -349,21 +349,13 @@ class Convert {
|
||||
|
||||
/**
|
||||
* Convert a string (normally a title) to a string suitable for using in
|
||||
* urls and other html attributes
|
||||
* urls and other html attributes. Uses {@link URLSegmentFilter}.
|
||||
*
|
||||
* @param string
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function raw2url($title) {
|
||||
$t = (function_exists('mb_strtolower')) ? mb_strtolower($title) : strtolower($title);
|
||||
$t = Object::create('Transliterator')->toASCII($t);
|
||||
$t = str_replace('&','-and-',$t);
|
||||
$t = str_replace('&','-and-',$t);
|
||||
$t = ereg_replace('[^A-Za-z0-9]+','-',$t);
|
||||
$t = ereg_replace('-+','-',$t);
|
||||
$t = trim($t, '-');
|
||||
|
||||
return $t;
|
||||
$f = Object::create('URLSegmentFilter');
|
||||
return $f->filter($title);
|
||||
}
|
||||
}
|
@ -75,6 +75,20 @@ This means all formats are defined in
|
||||
[http://framework.zend.com/manual/en/zend.date.constants.html#zend.date.constants.selfdefinedformats](ISO date format),
|
||||
not PHP's built-in [date()](http://nz.php.net/manual/en/function.date.php).
|
||||
|
||||
### i18n in URLs
|
||||
|
||||
By default, URLs for pages in SilverStripe (the `SiteTree->URLSegment` property)
|
||||
are automatically reduced to the allowed allowed subset of ASCII characters.
|
||||
If characters outside this subsetare added, they are either removed or (if possible) "transliterated".
|
||||
This describes the process of converting from one character set to another
|
||||
while keeping characters recognizeable. For example, vowels with french accents
|
||||
are replaced with their base characters, `pâté` becomes `pate`.
|
||||
|
||||
In order to allow for so called "multibyte" characters outside of the ASCII subset,
|
||||
limit the character filtering in the underlying class: `URLSegmentFilter::$default_use_transliterator = false`
|
||||
|
||||
Please refer to [W3C: Introduction to IDN and IRI](http://www.w3.org/International/articles/idn-and-iri/) for more details.
|
||||
|
||||
### i18n in Form Fields
|
||||
|
||||
Date- and time related form fields support i18n ([api:DateField], [api:TimeField], [api:DatetimeField]).
|
||||
|
@ -23,6 +23,8 @@
|
||||
* FileNameFilter::$default_use_transliterator = false;
|
||||
* FileNameFilter::$default_replacements = array();
|
||||
* </code>
|
||||
*
|
||||
* See {@link URLSegmentFilter} for a more generic implementation.
|
||||
*/
|
||||
class FileNameFilter {
|
||||
|
||||
|
137
model/URLSegmentFilter.php
Normal file
137
model/URLSegmentFilter.php
Normal file
@ -0,0 +1,137 @@
|
||||
<?php
|
||||
/**
|
||||
* @package sapphire
|
||||
* @subpackage model
|
||||
*/
|
||||
|
||||
/**
|
||||
* Filter certain characters from "URL segments" (also called "slugs"), for nicer (more SEO-friendly) URLs.
|
||||
* Uses {@link Transliterator} to convert non-ASCII characters to meaningful ASCII representations.
|
||||
* Use {@link $default_allow_multibyte} to allow a broader range of characters without transliteration.
|
||||
*
|
||||
* Caution: Should not be used on full URIs with domains or query parameters.
|
||||
* In order to retain forward slashes in a path, each individual segment needs to be filtered individually.
|
||||
*
|
||||
* See {@link FileNameFilter} for similar implementation for filesystem-based URLs.
|
||||
*/
|
||||
class URLSegmentFilter {
|
||||
|
||||
/**
|
||||
* Necessary to support {@link Object::create()}
|
||||
*/
|
||||
function __construct() {}
|
||||
|
||||
/**
|
||||
* @var Boolean
|
||||
*/
|
||||
static $default_use_transliterator = true;
|
||||
|
||||
/**
|
||||
* @var Array See {@link setReplacements()}.
|
||||
*/
|
||||
static $default_replacements = array(
|
||||
'/&/u' => '-and-',
|
||||
'/&/u' => '-and-',
|
||||
'/\s/u' => '-', // remove whitespace
|
||||
'/_/u' => '-', // underscores to dashes
|
||||
'/[^A-Za-z0-9+.-]+/u' => '', // remove non-ASCII chars, only allow alphanumeric plus dash and dot
|
||||
'/[\-]{2,}/u' => '-', // remove duplicate dashes
|
||||
'/^[\.\-_]/u' => '', // Remove all leading dots, dashes or underscores
|
||||
);
|
||||
|
||||
/**
|
||||
* Doesn't try to replace or transliterate non-ASCII filters.
|
||||
* Useful for character sets that have little overlap with ASCII (e.g. far eastern),
|
||||
* as well as better search engine optimization for URLs.
|
||||
* @see http://www.ietf.org/rfc/rfc3987
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
static $default_allow_multibyte = false;
|
||||
|
||||
/**
|
||||
* @var Array See {@link setReplacements()}
|
||||
*/
|
||||
public $replacements = array();
|
||||
|
||||
/**
|
||||
* Note: Depending on the applied replacement rules, this method might result in an empty string.
|
||||
*
|
||||
* @param String URL path (without domain or query parameters), in utf8 encoding
|
||||
* @return String A filtered path compatible with RFC 3986
|
||||
*/
|
||||
function filter($name) {
|
||||
if(!$this->getAllowMultibyte()) {
|
||||
// Only transliterate when no multibyte support is requested
|
||||
$transliterator = $this->getTransliterator();
|
||||
if($transliterator) $name = $transliterator->toASCII($name);
|
||||
}
|
||||
|
||||
$name = (function_exists('mb_strtolower')) ? mb_strtolower($name) : strtolower($name);
|
||||
$replacements = $this->getReplacements();
|
||||
if($this->getAllowMultibyte()) {
|
||||
// unset automated removal of non-ASCII characters, and don't try to transliterate
|
||||
if(isset($replacements['/[^A-Za-z0-9+.-]+/u'])) unset($replacements['/[^A-Za-z0-9+.-]+/u']);
|
||||
}
|
||||
foreach($replacements as $regex => $replace) {
|
||||
$name = preg_replace($regex, $replace, $name);
|
||||
}
|
||||
|
||||
return $name;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param Array Map of find/replace used for preg_replace().
|
||||
*/
|
||||
function setReplacements($r) {
|
||||
$this->replacements = $r;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Array
|
||||
*/
|
||||
function getReplacements() {
|
||||
return ($this->replacements) ? $this->replacements : self::$default_replacements;
|
||||
}
|
||||
|
||||
/**
|
||||
* @var Transliterator
|
||||
*/
|
||||
protected $transliterator;
|
||||
|
||||
/**
|
||||
* @return Transliterator|NULL
|
||||
*/
|
||||
function getTransliterator() {
|
||||
if($this->transliterator === null && self::$default_use_transliterator) {
|
||||
$this->transliterator = Object::create('Transliterator');
|
||||
}
|
||||
return $this->transliterator;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param Transliterator|FALSE
|
||||
*/
|
||||
function setTransliterator($t) {
|
||||
$this->transliterator = $t;
|
||||
}
|
||||
|
||||
/**
|
||||
* @var boolean
|
||||
*/
|
||||
protected $allowMultibyte;
|
||||
|
||||
/**
|
||||
* @param boolean
|
||||
*/
|
||||
function setAllowMultibyte($bool) {
|
||||
$this->allowMultibyte = $bool;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return boolean
|
||||
*/
|
||||
function getAllowMultibyte() {
|
||||
return ($this->allowMultibyte !== null) ? $this->allowMultibyte : self::$default_allow_multibyte;
|
||||
}
|
||||
}
|
@ -104,7 +104,7 @@ class ConvertTest extends SapphireTest {
|
||||
$this->assertEquals('foo', Convert::raw2url('foo'));
|
||||
$this->assertEquals('foo-and-bar', Convert::raw2url('foo & bar'));
|
||||
$this->assertEquals('foo-and-bar', Convert::raw2url('foo & bar!'));
|
||||
$this->assertEquals('foo-s-bar-2', Convert::raw2url('foo\'s [bar] (2)'));
|
||||
$this->assertEquals('foos-bar-2', Convert::raw2url('foo\'s [bar] (2)'));
|
||||
}
|
||||
|
||||
}
|
35
tests/model/URLSegmentFilterTest.php
Normal file
35
tests/model/URLSegmentFilterTest.php
Normal file
@ -0,0 +1,35 @@
|
||||
<?php
|
||||
/**
|
||||
* @package sapphire
|
||||
* @subpackage tests
|
||||
*/
|
||||
class URLSegmentFilterTest extends SapphireTest {
|
||||
|
||||
function testReplacesCommonEnglishSymbols() {
|
||||
$f = new URLSegmentFilter();
|
||||
$f->setAllowMultibyte(false);
|
||||
$this->assertEquals(
|
||||
'john-and-spencer',
|
||||
$f->filter('John & Spencer')
|
||||
);
|
||||
}
|
||||
|
||||
function testTransliteratesNonAsciiUrls() {
|
||||
$f = new URLSegmentFilter();
|
||||
$f->setAllowMultibyte(false);
|
||||
$this->assertEquals(
|
||||
'broetchen',
|
||||
$f->filter('Brötchen')
|
||||
);
|
||||
}
|
||||
|
||||
function testRetainsNonAsciiUrlsWithAllowMultiByteOption() {
|
||||
$f = new URLSegmentFilter();
|
||||
$f->setAllowMultibyte(true);
|
||||
$this->assertEquals(
|
||||
'brötchen',
|
||||
$f->filter('Brötchen')
|
||||
);
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user