2011-11-14 12:26:51 +01:00
< ? php
2016-08-19 00:51:35 +02:00
namespace SilverStripe\View\Parsers ;
use SilverStripe\Core\Object ;
2011-11-14 12:26:51 +01:00
/**
* Filter certain characters from " URL segments " ( also called " slugs " ), for nicer ( more SEO - friendly ) URLs .
2016-09-09 08:43:05 +02:00
* Uses { @ link Transliterator } to convert non - ASCII characters to meaningful ASCII representations .
2011-11-14 12:26:51 +01:00
* Use { @ link $default_allow_multibyte } to allow a broader range of characters without transliteration .
2014-08-15 08:53:05 +02:00
*
2011-11-14 12:26:51 +01:00
* Caution : Should not be used on full URIs with domains or query parameters .
* In order to retain forward slashes in a path , each individual segment needs to be filtered individually .
2014-08-15 08:53:05 +02:00
*
2011-11-14 12:26:51 +01:00
* See { @ link FileNameFilter } for similar implementation for filesystem - based URLs .
*/
2016-11-29 00:31:16 +01:00
class URLSegmentFilter extends Object
{
/**
* @ config
* @ var Boolean
*/
private static $default_use_transliterator = true ;
/**
* @ config
* @ var array See { @ link setReplacements ()} .
*/
private static $default_replacements = array (
'/&/u' => '-and-' ,
'/&/u' => '-and-' ,
'/\s|\+/u' => '-' , // remove whitespace/plus
'/[_.]+/u' => '-' , // underscores and dots to dashes
'/[^A-Za-z0-9\-]+/u' => '' , // remove non-ASCII chars, only allow alphanumeric and dashes
'/[\/\?=#]+/u' => '-' , // remove forward slashes, question marks, equal signs and hashes in case multibyte is allowed (and non-ASCII chars aren't removed)
'/[\-]{2,}/u' => '-' , // remove duplicate dashes
'/^[\-]+/u' => '' , // Remove all leading dashes
'/[\-]+$/u' => '' // Remove all trailing dashes
);
/**
* Doesn ' t try to replace or transliterate non - ASCII filters .
* Useful for character sets that have little overlap with ASCII ( e . g . far eastern ),
* as well as better search engine optimization for URLs .
* @ see http :// www . ietf . org / rfc / rfc3987
*
* @ config
* @ var boolean
*/
private static $default_allow_multibyte = false ;
/**
* @ var array See { @ link setReplacements ()}
*/
public $replacements = array ();
/**
* Note : Depending on the applied replacement rules , this method might result in an empty string .
*
* @ param string $name URL path ( without domain or query parameters ), in utf8 encoding
* @ return string A filtered path compatible with RFC 3986
*/
public function filter ( $name )
{
if ( ! $this -> getAllowMultibyte ()) {
// Only transliterate when no multibyte support is requested
$transliterator = $this -> getTransliterator ();
if ( $transliterator ) {
$name = $transliterator -> toASCII ( $name );
}
}
$name = mb_strtolower ( $name );
$replacements = $this -> getReplacements ();
// Unset automated removal of non-ASCII characters, and don't try to transliterate
if ( $this -> getAllowMultibyte () && isset ( $replacements [ '/[^A-Za-z0-9\-]+/u' ])) {
unset ( $replacements [ '/[^A-Za-z0-9\-]+/u' ]);
}
foreach ( $replacements as $regex => $replace ) {
$name = preg_replace ( $regex , $replace , $name );
}
// Multibyte URLs require percent encoding to comply to RFC 3986.
// Without this setting, the "remove non-ASCII chars" regex takes care of that.
if ( $this -> getAllowMultibyte ()) {
$name = rawurlencode ( $name );
}
return $name ;
}
/**
* @ param array $r Map of find / replace used for preg_replace () .
*/
public function setReplacements ( $r )
{
$this -> replacements = $r ;
}
/**
* @ return array
*/
public function getReplacements ()
{
return ( $this -> replacements ) ? $this -> replacements : ( array ) $this -> config () -> default_replacements ;
}
/**
* @ var Transliterator
*/
protected $transliterator ;
/**
* @ return Transliterator
*/
public function getTransliterator ()
{
if ( $this -> transliterator === null && $this -> config () -> default_use_transliterator ) {
$this -> transliterator = Transliterator :: create ();
}
return $this -> transliterator ;
}
/**
* @ param Transliterator $t
*/
public function setTransliterator ( $t )
{
$this -> transliterator = $t ;
}
/**
* @ var boolean
*/
protected $allowMultibyte ;
/**
* @ param boolean
*/
public function setAllowMultibyte ( $bool )
{
$this -> allowMultibyte = $bool ;
}
/**
* @ return boolean
*/
public function getAllowMultibyte ()
{
return ( $this -> allowMultibyte !== null ) ? $this -> allowMultibyte : $this -> config () -> default_allow_multibyte ;
}
2012-03-24 04:04:52 +01:00
}