2010-10-15 03:03:43 +02:00
|
|
|
|
<?php
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Support class for converting unicode strings into a suitable 7-bit ASCII equivalent.
|
2014-08-15 08:53:05 +02:00
|
|
|
|
*
|
2010-10-15 03:03:43 +02:00
|
|
|
|
* Usage:
|
2014-08-15 08:53:05 +02:00
|
|
|
|
*
|
2010-10-15 03:04:34 +02:00
|
|
|
|
* <code>
|
2012-06-15 05:54:47 +02:00
|
|
|
|
* $tr = new SS_Transliterator();
|
2010-10-15 03:03:43 +02:00
|
|
|
|
* $ascii = $tr->toASCII($unicode);
|
2010-10-15 03:04:34 +02:00
|
|
|
|
* </code>
|
2014-08-15 08:53:05 +02:00
|
|
|
|
*
|
2012-04-12 08:02:46 +02:00
|
|
|
|
* @package framework
|
2010-10-15 03:04:34 +02:00
|
|
|
|
* @subpackage model
|
2010-10-15 03:03:43 +02:00
|
|
|
|
*/
|
2012-06-15 05:54:47 +02:00
|
|
|
|
class SS_Transliterator extends Object {
|
2010-10-15 03:03:43 +02:00
|
|
|
|
/**
|
2013-03-21 19:48:54 +01:00
|
|
|
|
* @config
|
|
|
|
|
* @var boolean Allow the use of iconv() to perform transliteration. Set to false to disable.
|
2010-10-15 03:03:43 +02:00
|
|
|
|
* Even if this variable is true, iconv() won't be used if it's not installed.
|
|
|
|
|
*/
|
2013-03-21 19:48:54 +01:00
|
|
|
|
private static $use_iconv = false;
|
2014-08-15 08:53:05 +02:00
|
|
|
|
|
2010-10-15 03:03:43 +02:00
|
|
|
|
/**
|
|
|
|
|
* Convert the given utf8 string to a safe ASCII source
|
|
|
|
|
*/
|
2012-09-19 12:07:39 +02:00
|
|
|
|
public function toASCII($source) {
|
2013-03-21 19:48:54 +01:00
|
|
|
|
if(function_exists('iconv') && $this->config()->use_iconv) return $this->useIconv($source);
|
2010-10-15 03:03:43 +02:00
|
|
|
|
else return $this->useStrTr($source);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Transliteration using strtr() and a lookup table
|
|
|
|
|
*/
|
|
|
|
|
protected function useStrTr($source) {
|
2011-10-10 10:14:19 +02:00
|
|
|
|
$table = array(
|
|
|
|
|
'Š'=>'S', 'š'=>'s', 'Đ'=>'Dj', 'đ'=>'dj', 'Ž'=>'Z', 'ž'=>'z', 'Č'=>'C', 'č'=>'c', 'Ć'=>'C', 'ć'=>'c',
|
|
|
|
|
'À'=>'A', 'Á'=>'A', 'Â'=>'A', 'Ã'=>'A', 'Ä'=>'Ae', 'Å'=>'A', 'Æ'=>'A', 'Ç'=>'C', 'È'=>'E', 'É'=>'E',
|
|
|
|
|
'Ê'=>'E', 'Ë'=>'E', 'Ì'=>'I', 'Í'=>'I', 'Î'=>'I', 'Ï'=>'I', 'Ñ'=>'N', 'Ò'=>'O', 'Ó'=>'O', 'Ô'=>'O',
|
|
|
|
|
'Õ'=>'O', 'Ö'=>'Oe', 'Ø'=>'O', 'Ù'=>'U', 'Ú'=>'U', 'Û'=>'U', 'Ü'=>'Ue', 'Ý'=>'Y', 'Þ'=>'B', 'ß'=>'ss',
|
|
|
|
|
'à'=>'a', 'á'=>'a', 'â'=>'a', 'ã'=>'a', 'ä'=>'ae', 'å'=>'a', 'æ'=>'ae', 'ç'=>'c', 'è'=>'e', 'é'=>'e',
|
|
|
|
|
'ê'=>'e', 'ë'=>'e', 'ì'=>'i', 'í'=>'i', 'î'=>'i', 'ï'=>'i', 'ð'=>'o', 'ñ'=>'n', 'ò'=>'o', 'ó'=>'o',
|
|
|
|
|
'ô'=>'o', 'õ'=>'o', 'ö'=>'oe', 'ø'=>'o', 'ù'=>'u', 'ú'=>'u', 'û'=>'u', 'ü'=>'ue', 'ý'=>'y', 'ý'=>'y',
|
2010-10-15 03:11:07 +02:00
|
|
|
|
'þ'=>'b', 'ÿ'=>'y', 'Ŕ'=>'R', 'ŕ'=>'r',
|
2010-10-15 03:03:43 +02:00
|
|
|
|
'Ā'=>'A', 'ā'=>'a', 'Ē'=>'E', 'ē'=>'e', 'Ī'=>'I', 'ī'=>'i', 'Ō'=>'O', 'ō'=>'o', 'Ū'=>'U', 'ū'=>'u',
|
2012-09-26 23:34:00 +02:00
|
|
|
|
'œ'=>'oe', 'ß'=>'ss', 'ij'=>'ij', 'ą'=>'a','ę'=>'e', 'ė'=>'e', 'į'=>'i','ų'=>'u','ū'=>'u', 'Ą'=>'A',
|
2015-05-20 19:05:33 +02:00
|
|
|
|
'Ę'=>'E', 'Ė'=>'E', 'Į'=>'I','Ų'=>'U','Ū'=>'U',
|
2014-02-07 05:43:41 +01:00
|
|
|
|
"ľ"=>"l", "Ľ"=>"L", "ť"=>"t", "Ť"=>"T", "ů"=>"u", "Ů"=>"U",
|
2015-05-20 19:05:33 +02:00
|
|
|
|
'ł'=>'l', 'Ł'=>'L', 'ń'=>'n', 'Ń'=>'N', 'ś'=>'s', 'Ś'=>'S', 'ź'=>'z', 'Ź'=>'Z', 'ż'=>'z', 'Ż'=>'Z',
|
2017-02-12 15:04:33 +01:00
|
|
|
|
'а'=>"a",'б'=>"b",'в'=>"v",'г'=>"g",'д'=>"d",'е'=>"e",'ё'=>"yo",'ж'=>"zh",'з'=>"z",'и'=>"i",
|
|
|
|
|
'й'=>"y",'к'=>"k",'л'=>"l",'м'=>"m",'н'=>"n",'о'=>"o",'п'=>"p",'р'=>"r",'с'=>"s",'т'=>"t",
|
|
|
|
|
'у'=>"u",'ф'=>"f",'х'=>"kh",'ц'=>"ts",'ч'=>"ch",'ш'=>"sh",'щ'=>"shch",'ы'=>"y",'э'=>"e",'ю'=>"yu",
|
|
|
|
|
'я'=>"ya",
|
|
|
|
|
'А'=>"A",'Б'=>"B",'В'=>"V",'Г'=>"G",'Д'=>"D",'Е'=>"E",'Ё'=>"YO",'Ж'=>"ZH",'З'=>"Z",'И'=>"I",
|
|
|
|
|
'Й'=>"Y",'К'=>"K",'Л'=>"L",'М'=>"M",'Н'=>"N",'О'=>"O",'П'=>"P",'Р'=>"R",'С'=>"S",'Т'=>"T",
|
|
|
|
|
'У'=>"U",'Ф'=>"F",'Х'=>"KH",'Ц'=>"TS",'Ч'=>"CH",'Ш'=>"SH",'Щ'=>"SHCH",'Ы'=>"Y",'Э'=>"E",'Ю'=>"YU",
|
|
|
|
|
'Я'=>"YA",
|
2017-02-14 09:19:59 +01:00
|
|
|
|
'α'=>'a', 'Α'=>'A', 'ά'=>'a', 'Ά'=>'A', 'β'=>'v', 'Β'=>'V', 'γ'=>'g', 'Γ'=>'G', 'δ'=>'d', 'Δ'=>'D',
|
|
|
|
|
'ε'=>'e', 'ϵ'=>'e', 'Ε'=>'E', 'έ'=>'e', 'Έ'=>'E', 'ζ'=>'z', 'Ζ'=>'Z', 'η'=>'i', 'Η'=>'I',
|
|
|
|
|
'θ'=>'th', 'ϑ' => 'th', 'Θ'=>'TH', 'ι'=>'i', 'Ι'=>'I', 'ί'=>'i', 'Ί'=>'I', 'κ'=>'k', 'ϰ'=>'k', 'Κ'=>'K',
|
|
|
|
|
'λ'=>'l', 'Λ'=>'L', 'μ'=>'m', 'Μ'=>'M', 'ν'=>'n', 'Ν'=>'N', 'ή'=>'n', 'Ή'=>'N', 'ἠ'=>'n', 'Ἠ'=>'N',
|
|
|
|
|
'ο'=>'o', 'Ο'=>'O', 'ό'=>'o', 'Ό'=>'O', 'π'=>'p', 'Π'=>'P', 'ρ'=>'r', 'ϱ'=>'r', 'Ρ'=>'R', 'ῤ'=>'rh',
|
|
|
|
|
'σ'=>'s', 'ς'=>'s', 'Σ'=>'S', 'τ'=>'t', 'Τ'=>'T', 'υ'=>'y', 'Υ'=>'Y', 'ύ'=>'y', 'Ύ'=>'Y', 'ὐ'=>'y',
|
|
|
|
|
'φ'=>'f', 'ϕ'=>'f', 'Φ'=>'F', 'χ'=>'ch', 'Χ'=>'CH', 'ψ'=>'ps', 'Ψ'=>'PS', 'ξ'=>'x', 'Ξ'=>'X',
|
|
|
|
|
'ω'=>'w', 'Ω'=>'W', 'ώ'=>'o', 'Ώ'=>'O', 'ὠ'=>'o', 'Ὠ'=>'O',
|
2011-10-10 10:14:19 +02:00
|
|
|
|
);
|
2010-10-15 03:03:43 +02:00
|
|
|
|
|
2011-10-10 10:14:19 +02:00
|
|
|
|
return strtr($source, $table);
|
2010-10-15 03:03:43 +02:00
|
|
|
|
}
|
2014-08-15 08:53:05 +02:00
|
|
|
|
|
2010-10-15 03:03:43 +02:00
|
|
|
|
/**
|
|
|
|
|
* Transliteration using iconv()
|
|
|
|
|
*/
|
|
|
|
|
protected function useIconv($source) {
|
2012-12-08 12:20:20 +01:00
|
|
|
|
return iconv("utf-8", "us-ascii//IGNORE//TRANSLIT", $source);
|
2010-10-15 03:03:43 +02:00
|
|
|
|
}
|
2012-03-24 04:04:52 +01:00
|
|
|
|
}
|