Merge pull request #35 from hafriedlander/solr4
API Add support for Solr 4 and make it the default
This commit is contained in:
commit
efdc96e937
|
@ -11,9 +11,13 @@ class Solr {
|
||||||
* path (default: /solr) - The suburl the solr service is available on
|
* path (default: /solr) - The suburl the solr service is available on
|
||||||
*
|
*
|
||||||
* Optional fields:
|
* Optional fields:
|
||||||
* extraspath (default: <basefolder>/fulltextsearch/conf/extras/) - Absolute path to
|
* version (default: 4) - The Solr server version. Currently supports 3 and 4 (you can add a sub-version like 4.5 if
|
||||||
|
* you like, but currently it has no effect)
|
||||||
|
* service (default: depends on version, Solr3Service for 3, Solr4Service for 4)
|
||||||
|
* the class that provides actual communcation to the Solr server
|
||||||
|
* extraspath (default: <basefolder>/fulltextsearch/conf/solr/{version}/extras/) - Absolute path to
|
||||||
* the folder containing templates which are used for generating the schema and field definitions.
|
* the folder containing templates which are used for generating the schema and field definitions.
|
||||||
* templates (default: <basefolder>/fulltextsearch/conf/templates/) - Absolute path to
|
* templates (default: <basefolder>/fulltextsearch/conf/solr/{version}/templates/) - Absolute path to
|
||||||
* the configuration default files, e.g. solrconfig.xml.
|
* the configuration default files, e.g. solrconfig.xml.
|
||||||
*
|
*
|
||||||
* indexstore => an array with
|
* indexstore => an array with
|
||||||
|
@ -29,36 +33,87 @@ class Solr {
|
||||||
* path (default: /solrindex) - The suburl on the solr host that is set up to accept index configurations via webdav
|
* path (default: /solrindex) - The suburl on the solr host that is set up to accept index configurations via webdav
|
||||||
* remotepath - The path that the Solr server will read the index configurations from
|
* remotepath - The path that the Solr server will read the index configurations from
|
||||||
*/
|
*/
|
||||||
static $solr_options = array();
|
protected static $solr_options = array();
|
||||||
|
|
||||||
|
/** A cache of solr_options with the defaults all merged in */
|
||||||
|
protected static $merged_solr_options = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update the configuration for Solr. See $solr_options for a discussion of the accepted array keys
|
||||||
|
* @param array $options - The options to update
|
||||||
|
*/
|
||||||
static function configure_server($options = array()) {
|
static function configure_server($options = array()) {
|
||||||
self::$solr_options = array_merge(array(
|
self::$solr_options = array_merge(self::$solr_options, $options);
|
||||||
|
self::$merged_solr_options = null;
|
||||||
|
|
||||||
|
self::$service_singleton = null;
|
||||||
|
self::$service_core_singletons = array();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the configured Solr options with the defaults all merged in
|
||||||
|
* @return array - The merged options
|
||||||
|
*/
|
||||||
|
static function solr_options() {
|
||||||
|
if (self::$merged_solr_options) return self::$merged_solr_options;
|
||||||
|
|
||||||
|
$defaults = array(
|
||||||
'host' => 'localhost',
|
'host' => 'localhost',
|
||||||
'port' => 8983,
|
'port' => 8983,
|
||||||
'path' => '/solr',
|
'path' => '/solr',
|
||||||
'extraspath' => Director::baseFolder().'/fulltextsearch/conf/extras/',
|
'version' => '4'
|
||||||
'templatespath' => Director::baseFolder().'/fulltextsearch/conf/templates/',
|
);
|
||||||
), self::$solr_options, $options);
|
|
||||||
}
|
|
||||||
|
|
||||||
static protected $service_class = 'SolrService';
|
// Build some by-version defaults
|
||||||
|
$version = isset(self::$solr_options['version']) ? self::$solr_options['version'] : $defaults['version'];
|
||||||
|
|
||||||
static function set_service_class($class) {
|
if (version_compare($version, '4', '>=')){
|
||||||
self::$service_class = $class;
|
$versionDefaults = array(
|
||||||
self::$service = null;
|
'service' => 'Solr4Service',
|
||||||
}
|
'extraspath' => Director::baseFolder().'/fulltextsearch/conf/solr/4/extras/',
|
||||||
|
'templatespath' => Director::baseFolder().'/fulltextsearch/conf/solr/4/templates/',
|
||||||
static protected $service = null;
|
);
|
||||||
|
}
|
||||||
static function service($core = null) {
|
else {
|
||||||
if (!self::$service) {
|
$versionDefaults = array(
|
||||||
if (!self::$solr_options) user_error('No configuration for Solr server provided', E_USER_ERROR);
|
'service' => 'Solr3Service',
|
||||||
|
'extraspath' => Director::baseFolder().'/fulltextsearch/conf/solr/3/extras/',
|
||||||
$class = self::$service_class;
|
'templatespath' => Director::baseFolder().'/fulltextsearch/conf/solr/3/templates/',
|
||||||
self::$service = new $class(self::$solr_options['host'], self::$solr_options['port'], self::$solr_options['path']);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
return $core ? self::$service->serviceForCore($core) : self::$service;
|
return (self::$merged_solr_options = array_merge($defaults, $versionDefaults, self::$solr_options));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static function set_service_class($class) {
|
||||||
|
user_error('set_service_class is deprecated - pass as part of $options to configure_server', E_USER_WARNING);
|
||||||
|
self::configure_server(array('service' => $class));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @var SolrService | null - The instance of SolrService for core management */
|
||||||
|
static protected $service_singleton = null;
|
||||||
|
/** @var [SolrService_Core] - The instances of SolrService_Core for each core */
|
||||||
|
static protected $service_core_singletons = array();
|
||||||
|
|
||||||
|
static function service($core = null) {
|
||||||
|
$options = self::solr_options();
|
||||||
|
|
||||||
|
if (!self::$service_singleton) {
|
||||||
|
self::$service_singleton = Object::create(
|
||||||
|
$options['service'], $options['host'], $options['port'], $options['path']
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($core) {
|
||||||
|
if (!isset(self::$service_core_singletons[$core])) {
|
||||||
|
self::$service_core_singletons[$core] = self::$service_singleton->serviceForCore($core);
|
||||||
|
}
|
||||||
|
|
||||||
|
return self::$service_core_singletons[$core];
|
||||||
|
} else {
|
||||||
|
return self::$service_singleton;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static function get_indexes() {
|
static function get_indexes() {
|
||||||
|
@ -66,8 +121,8 @@ class Solr {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Include the thirdparty Solr client api library. Done this way to avoid issues where code is called in mysite/_config
|
* Include the thirdparty Solr client api library. Done this way to avoid issues where code is called in
|
||||||
* before fulltextsearch/_config has a change to update the include path.
|
* mysite/_config before fulltextsearch/_config has a change to update the include path.
|
||||||
*/
|
*/
|
||||||
static function include_client_api() {
|
static function include_client_api() {
|
||||||
static $included = false;
|
static $included = false;
|
||||||
|
@ -88,8 +143,9 @@ class Solr_Configure extends BuildTask {
|
||||||
public function run($request) {
|
public function run($request) {
|
||||||
$service = Solr::service();
|
$service = Solr::service();
|
||||||
$indexes = Solr::get_indexes();
|
$indexes = Solr::get_indexes();
|
||||||
|
$options = Solr::solr_options();
|
||||||
|
|
||||||
if (!isset(Solr::$solr_options['indexstore']) || !($indexstore = Solr::$solr_options['indexstore'])) {
|
if (!isset($options['indexstore']) || !($indexstore = $options['indexstore'])) {
|
||||||
user_error('No index configuration for Solr provided', E_USER_ERROR);
|
user_error('No index configuration for Solr provided', E_USER_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,8 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
class Solr3Service_Core extends SolrService_Core {
|
||||||
|
}
|
||||||
|
|
||||||
|
class Solr3Service extends SolrService {
|
||||||
|
private static $core_class = 'Solr3Service_Core';
|
||||||
|
}
|
|
@ -0,0 +1,25 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
class Solr4Service_Core extends SolrService_Core {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Replace underlying commit function to remove waitFlush in 4.0+, since it's been deprecated and 4.4 throws errors
|
||||||
|
* if you pass it
|
||||||
|
*/
|
||||||
|
public function commit($expungeDeletes = false, $waitFlush = null, $waitSearcher = true, $timeout = 3600) {
|
||||||
|
if ($waitFlush) {
|
||||||
|
user_error('waitFlush must be false when using Solr 4.0+' . E_USER_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
$expungeValue = $expungeDeletes ? 'true' : 'false';
|
||||||
|
$searcherValue = $waitSearcher ? 'true' : 'false';
|
||||||
|
|
||||||
|
$rawPost = '<commit expungeDeletes="' . $expungeValue . '" waitSearcher="' . $searcherValue . '" />';
|
||||||
|
return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class Solr4Service extends SolrService {
|
||||||
|
private static $core_class = 'Solr4Service_Core';
|
||||||
|
}
|
||||||
|
|
|
@ -80,10 +80,12 @@ class SolrConfigStore_File implements SolrConfigStore {
|
||||||
*/
|
*/
|
||||||
class SolrConfigStore_WebDAV implements SolrConfigStore {
|
class SolrConfigStore_WebDAV implements SolrConfigStore {
|
||||||
function __construct($config) {
|
function __construct($config) {
|
||||||
|
$options = Solr::solr_options();
|
||||||
|
|
||||||
$this->url = implode('', array(
|
$this->url = implode('', array(
|
||||||
'http://',
|
'http://',
|
||||||
isset($config['auth']) ? $config['auth'].'@' : '',
|
isset($config['auth']) ? $config['auth'].'@' : '',
|
||||||
Solr::$solr_options['host'] . ':' . Solr::$solr_options['port'],
|
$options['host'].':'.$options['port'],
|
||||||
$config['path']
|
$config['path']
|
||||||
));
|
));
|
||||||
$this->remote = $config['remotepath'];
|
$this->remote = $config['remotepath'];
|
||||||
|
|
|
@ -36,7 +36,8 @@ abstract class SolrIndex extends SearchIndex {
|
||||||
* templates which are used for generating the schema and field definitions.
|
* templates which are used for generating the schema and field definitions.
|
||||||
*/
|
*/
|
||||||
function getTemplatesPath() {
|
function getTemplatesPath() {
|
||||||
return $this->templatesPath ? $this->templatesPath : Solr::$solr_options['templatespath'];
|
$globalOptions = Solr::solr_options();
|
||||||
|
return $this->templatesPath ? $this->templatesPath : $globalOptions['templatespath'];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -44,7 +45,8 @@ abstract class SolrIndex extends SearchIndex {
|
||||||
* e.g. solrconfig.xml.
|
* e.g. solrconfig.xml.
|
||||||
*/
|
*/
|
||||||
function getExtrasPath() {
|
function getExtrasPath() {
|
||||||
return $this->extrasPath ? $this->extrasPath : Solr::$solr_options['extraspath'];
|
$globalOptions = Solr::solr_options();
|
||||||
|
return $this->extrasPath ? $this->extrasPath : $globalOptions['extraspath'];
|
||||||
}
|
}
|
||||||
|
|
||||||
function generateSchema() {
|
function generateSchema() {
|
||||||
|
|
|
@ -2,10 +2,22 @@
|
||||||
|
|
||||||
Solr::include_client_api();
|
Solr::include_client_api();
|
||||||
|
|
||||||
class SolrService extends Apache_Solr_Service {
|
/**
|
||||||
|
* The API for accessing a specific core of a Solr server. Exactly the same as Apache_Solr_Service for now.
|
||||||
|
*/
|
||||||
|
class SolrService_Core extends Apache_Solr_Service {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The API for accessing the primary Solr installation, which includes both SolrService_Core,
|
||||||
|
* plus extra methods for interrogating, creating, reloading and getting SolrService_Core instances
|
||||||
|
* for Solr cores.
|
||||||
|
*/
|
||||||
|
class SolrService extends SolrService_Core {
|
||||||
|
private static $core_class = 'SolrService_Core';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return Apache_Solr_Response
|
* Handle encoding the GET parameters and making the HTTP call to execute a core command
|
||||||
*/
|
*/
|
||||||
protected function coreCommand($command, $core, $params=array()) {
|
protected function coreCommand($command, $core, $params=array()) {
|
||||||
$command = strtoupper($command);
|
$command = strtoupper($command);
|
||||||
|
@ -17,7 +29,9 @@ class SolrService extends Apache_Solr_Service {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return boolean
|
* Is the passed core active?
|
||||||
|
* @param $core string - The name of the core
|
||||||
|
* @return boolean - True if that core exists & is active
|
||||||
*/
|
*/
|
||||||
public function coreIsActive($core) {
|
public function coreIsActive($core) {
|
||||||
$result = $this->coreCommand('STATUS', $core);
|
$result = $this->coreCommand('STATUS', $core);
|
||||||
|
@ -25,6 +39,12 @@ class SolrService extends Apache_Solr_Service {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* Create a new core
|
||||||
|
* @param $core string - The name of the core
|
||||||
|
* @param $instancedir string - The base path of the core on the server
|
||||||
|
* @param $config string - The filename of solrconfig.xml on the server. Default is $instancedir/solrconfig.xml
|
||||||
|
* @param $schema string - The filename of schema.xml on the server. Default is $instancedir/schema.xml
|
||||||
|
* @param $datadir string - The path to store data for this core on the server. Default depends on solrconfig.xml
|
||||||
* @return Apache_Solr_Response
|
* @return Apache_Solr_Response
|
||||||
*/
|
*/
|
||||||
public function coreCreate($core, $instancedir, $config=null, $schema=null, $datadir=null) {
|
public function coreCreate($core, $instancedir, $config=null, $schema=null, $datadir=null) {
|
||||||
|
@ -37,19 +57,21 @@ class SolrService extends Apache_Solr_Service {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* Reload a core
|
||||||
|
* @param $core string - The name of the core
|
||||||
* @return Apache_Solr_Response
|
* @return Apache_Solr_Response
|
||||||
*/
|
*/
|
||||||
public function coreReload($core) {
|
public function coreReload($core) {
|
||||||
return $this->coreCommand('RELOAD', $core);
|
return $this->coreCommand('RELOAD', $core);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected $_serviceCache = array();
|
/**
|
||||||
|
* Create a new Solr3Service_Core instance for the passed core
|
||||||
|
* @param $core string - The name of the core
|
||||||
|
* @return Solr3Service_Core
|
||||||
|
*/
|
||||||
public function serviceForCore($core) {
|
public function serviceForCore($core) {
|
||||||
if (!isset($this->_serviceCache[$core])) {
|
$klass = Config::inst()->get(get_called_class(), 'core_class');
|
||||||
$this->_serviceCache[$core] = new Apache_Solr_Service($this->_host, $this->_port, $this->_path."$core", $this->_httpTransport);
|
return new $klass($this->_host, $this->_port, $this->_path.$core, $this->_httpTransport);
|
||||||
}
|
|
||||||
|
|
||||||
return $this->_serviceCache[$core];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,36 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" ?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- If this file is found in the config directory, it will only be
|
||||||
|
loaded once at startup. If it is found in Solr's data
|
||||||
|
directory, it will be re-loaded every commit.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<elevate>
|
||||||
|
<query text="foo bar">
|
||||||
|
<doc id="1" />
|
||||||
|
<doc id="2" />
|
||||||
|
<doc id="3" />
|
||||||
|
</query>
|
||||||
|
|
||||||
|
<query text="ipod">
|
||||||
|
<doc id="MA147LL/A" /> <!-- put the actual ipod at the top -->
|
||||||
|
<doc id="IW-02" exclude="true" /> <!-- exclude this cable -->
|
||||||
|
</query>
|
||||||
|
|
||||||
|
</elevate>
|
|
@ -0,0 +1,3813 @@
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
|
||||||
|
# This map converts alphabetic, numeric, and symbolic Unicode characters
|
||||||
|
# which are not in the first 127 ASCII characters (the "Basic Latin" Unicode
|
||||||
|
# block) into their ASCII equivalents, if one exists.
|
||||||
|
#
|
||||||
|
# Characters from the following Unicode blocks are converted; however, only
|
||||||
|
# those characters with reasonable ASCII alternatives are converted:
|
||||||
|
#
|
||||||
|
# - C1 Controls and Latin-1 Supplement: http://www.unicode.org/charts/PDF/U0080.pdf
|
||||||
|
# - Latin Extended-A: http://www.unicode.org/charts/PDF/U0100.pdf
|
||||||
|
# - Latin Extended-B: http://www.unicode.org/charts/PDF/U0180.pdf
|
||||||
|
# - Latin Extended Additional: http://www.unicode.org/charts/PDF/U1E00.pdf
|
||||||
|
# - Latin Extended-C: http://www.unicode.org/charts/PDF/U2C60.pdf
|
||||||
|
# - Latin Extended-D: http://www.unicode.org/charts/PDF/UA720.pdf
|
||||||
|
# - IPA Extensions: http://www.unicode.org/charts/PDF/U0250.pdf
|
||||||
|
# - Phonetic Extensions: http://www.unicode.org/charts/PDF/U1D00.pdf
|
||||||
|
# - Phonetic Extensions Supplement: http://www.unicode.org/charts/PDF/U1D80.pdf
|
||||||
|
# - General Punctuation: http://www.unicode.org/charts/PDF/U2000.pdf
|
||||||
|
# - Superscripts and Subscripts: http://www.unicode.org/charts/PDF/U2070.pdf
|
||||||
|
# - Enclosed Alphanumerics: http://www.unicode.org/charts/PDF/U2460.pdf
|
||||||
|
# - Dingbats: http://www.unicode.org/charts/PDF/U2700.pdf
|
||||||
|
# - Supplemental Punctuation: http://www.unicode.org/charts/PDF/U2E00.pdf
|
||||||
|
# - Alphabetic Presentation Forms: http://www.unicode.org/charts/PDF/UFB00.pdf
|
||||||
|
# - Halfwidth and Fullwidth Forms: http://www.unicode.org/charts/PDF/UFF00.pdf
|
||||||
|
#
|
||||||
|
# See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode
|
||||||
|
#
|
||||||
|
# The set of character conversions supported by this map is a superset of
|
||||||
|
# those supported by the map represented by mapping-ISOLatin1Accent.txt.
|
||||||
|
#
|
||||||
|
# See the bottom of this file for the Perl script used to generate the contents
|
||||||
|
# of this file (without this header) from ASCIIFoldingFilter.java.
|
||||||
|
|
||||||
|
|
||||||
|
# Syntax:
|
||||||
|
# "source" => "target"
|
||||||
|
# "source".length() > 0 (source cannot be empty.)
|
||||||
|
# "target".length() >= 0 (target can be empty.)
|
||||||
|
|
||||||
|
|
||||||
|
# À [LATIN CAPITAL LETTER A WITH GRAVE]
|
||||||
|
"\u00C0" => "A"
|
||||||
|
|
||||||
|
# Á [LATIN CAPITAL LETTER A WITH ACUTE]
|
||||||
|
"\u00C1" => "A"
|
||||||
|
|
||||||
|
# Â [LATIN CAPITAL LETTER A WITH CIRCUMFLEX]
|
||||||
|
"\u00C2" => "A"
|
||||||
|
|
||||||
|
# Ã [LATIN CAPITAL LETTER A WITH TILDE]
|
||||||
|
"\u00C3" => "A"
|
||||||
|
|
||||||
|
# Ä [LATIN CAPITAL LETTER A WITH DIAERESIS]
|
||||||
|
"\u00C4" => "A"
|
||||||
|
|
||||||
|
# Å [LATIN CAPITAL LETTER A WITH RING ABOVE]
|
||||||
|
"\u00C5" => "A"
|
||||||
|
|
||||||
|
# Ā [LATIN CAPITAL LETTER A WITH MACRON]
|
||||||
|
"\u0100" => "A"
|
||||||
|
|
||||||
|
# Ă [LATIN CAPITAL LETTER A WITH BREVE]
|
||||||
|
"\u0102" => "A"
|
||||||
|
|
||||||
|
# Ą [LATIN CAPITAL LETTER A WITH OGONEK]
|
||||||
|
"\u0104" => "A"
|
||||||
|
|
||||||
|
# Ə http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA]
|
||||||
|
"\u018F" => "A"
|
||||||
|
|
||||||
|
# Ǎ [LATIN CAPITAL LETTER A WITH CARON]
|
||||||
|
"\u01CD" => "A"
|
||||||
|
|
||||||
|
# Ǟ [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON]
|
||||||
|
"\u01DE" => "A"
|
||||||
|
|
||||||
|
# Ǡ [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON]
|
||||||
|
"\u01E0" => "A"
|
||||||
|
|
||||||
|
# Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE]
|
||||||
|
"\u01FA" => "A"
|
||||||
|
|
||||||
|
# Ȁ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE]
|
||||||
|
"\u0200" => "A"
|
||||||
|
|
||||||
|
# Ȃ [LATIN CAPITAL LETTER A WITH INVERTED BREVE]
|
||||||
|
"\u0202" => "A"
|
||||||
|
|
||||||
|
# Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE]
|
||||||
|
"\u0226" => "A"
|
||||||
|
|
||||||
|
# Ⱥ [LATIN CAPITAL LETTER A WITH STROKE]
|
||||||
|
"\u023A" => "A"
|
||||||
|
|
||||||
|
# ᴀ [LATIN LETTER SMALL CAPITAL A]
|
||||||
|
"\u1D00" => "A"
|
||||||
|
|
||||||
|
# Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW]
|
||||||
|
"\u1E00" => "A"
|
||||||
|
|
||||||
|
# Ạ [LATIN CAPITAL LETTER A WITH DOT BELOW]
|
||||||
|
"\u1EA0" => "A"
|
||||||
|
|
||||||
|
# Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE]
|
||||||
|
"\u1EA2" => "A"
|
||||||
|
|
||||||
|
# Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE]
|
||||||
|
"\u1EA4" => "A"
|
||||||
|
|
||||||
|
# Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE]
|
||||||
|
"\u1EA6" => "A"
|
||||||
|
|
||||||
|
# Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
|
||||||
|
"\u1EA8" => "A"
|
||||||
|
|
||||||
|
# Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE]
|
||||||
|
"\u1EAA" => "A"
|
||||||
|
|
||||||
|
# Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
|
||||||
|
"\u1EAC" => "A"
|
||||||
|
|
||||||
|
# Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE]
|
||||||
|
"\u1EAE" => "A"
|
||||||
|
|
||||||
|
# Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE]
|
||||||
|
"\u1EB0" => "A"
|
||||||
|
|
||||||
|
# Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE]
|
||||||
|
"\u1EB2" => "A"
|
||||||
|
|
||||||
|
# Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE]
|
||||||
|
"\u1EB4" => "A"
|
||||||
|
|
||||||
|
# Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW]
|
||||||
|
"\u1EB6" => "A"
|
||||||
|
|
||||||
|
# Ⓐ [CIRCLED LATIN CAPITAL LETTER A]
|
||||||
|
"\u24B6" => "A"
|
||||||
|
|
||||||
|
# A [FULLWIDTH LATIN CAPITAL LETTER A]
|
||||||
|
"\uFF21" => "A"
|
||||||
|
|
||||||
|
# à [LATIN SMALL LETTER A WITH GRAVE]
|
||||||
|
"\u00E0" => "a"
|
||||||
|
|
||||||
|
# á [LATIN SMALL LETTER A WITH ACUTE]
|
||||||
|
"\u00E1" => "a"
|
||||||
|
|
||||||
|
# â [LATIN SMALL LETTER A WITH CIRCUMFLEX]
|
||||||
|
"\u00E2" => "a"
|
||||||
|
|
||||||
|
# ã [LATIN SMALL LETTER A WITH TILDE]
|
||||||
|
"\u00E3" => "a"
|
||||||
|
|
||||||
|
# ä [LATIN SMALL LETTER A WITH DIAERESIS]
|
||||||
|
"\u00E4" => "a"
|
||||||
|
|
||||||
|
# å [LATIN SMALL LETTER A WITH RING ABOVE]
|
||||||
|
"\u00E5" => "a"
|
||||||
|
|
||||||
|
# ā [LATIN SMALL LETTER A WITH MACRON]
|
||||||
|
"\u0101" => "a"
|
||||||
|
|
||||||
|
# ă [LATIN SMALL LETTER A WITH BREVE]
|
||||||
|
"\u0103" => "a"
|
||||||
|
|
||||||
|
# ą [LATIN SMALL LETTER A WITH OGONEK]
|
||||||
|
"\u0105" => "a"
|
||||||
|
|
||||||
|
# ǎ [LATIN SMALL LETTER A WITH CARON]
|
||||||
|
"\u01CE" => "a"
|
||||||
|
|
||||||
|
# ǟ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON]
|
||||||
|
"\u01DF" => "a"
|
||||||
|
|
||||||
|
# ǡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON]
|
||||||
|
"\u01E1" => "a"
|
||||||
|
|
||||||
|
# ǻ [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE]
|
||||||
|
"\u01FB" => "a"
|
||||||
|
|
||||||
|
# ȁ [LATIN SMALL LETTER A WITH DOUBLE GRAVE]
|
||||||
|
"\u0201" => "a"
|
||||||
|
|
||||||
|
# ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE]
|
||||||
|
"\u0203" => "a"
|
||||||
|
|
||||||
|
# ȧ [LATIN SMALL LETTER A WITH DOT ABOVE]
|
||||||
|
"\u0227" => "a"
|
||||||
|
|
||||||
|
# ɐ [LATIN SMALL LETTER TURNED A]
|
||||||
|
"\u0250" => "a"
|
||||||
|
|
||||||
|
# ə [LATIN SMALL LETTER SCHWA]
|
||||||
|
"\u0259" => "a"
|
||||||
|
|
||||||
|
# ɚ [LATIN SMALL LETTER SCHWA WITH HOOK]
|
||||||
|
"\u025A" => "a"
|
||||||
|
|
||||||
|
# ᶏ [LATIN SMALL LETTER A WITH RETROFLEX HOOK]
|
||||||
|
"\u1D8F" => "a"
|
||||||
|
|
||||||
|
# ᶕ [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK]
|
||||||
|
"\u1D95" => "a"
|
||||||
|
|
||||||
|
# ạ [LATIN SMALL LETTER A WITH RING BELOW]
|
||||||
|
"\u1E01" => "a"
|
||||||
|
|
||||||
|
# ả [LATIN SMALL LETTER A WITH RIGHT HALF RING]
|
||||||
|
"\u1E9A" => "a"
|
||||||
|
|
||||||
|
# ạ [LATIN SMALL LETTER A WITH DOT BELOW]
|
||||||
|
"\u1EA1" => "a"
|
||||||
|
|
||||||
|
# ả [LATIN SMALL LETTER A WITH HOOK ABOVE]
|
||||||
|
"\u1EA3" => "a"
|
||||||
|
|
||||||
|
# ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE]
|
||||||
|
"\u1EA5" => "a"
|
||||||
|
|
||||||
|
# ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE]
|
||||||
|
"\u1EA7" => "a"
|
||||||
|
|
||||||
|
# ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
|
||||||
|
"\u1EA9" => "a"
|
||||||
|
|
||||||
|
# ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE]
|
||||||
|
"\u1EAB" => "a"
|
||||||
|
|
||||||
|
# ậ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
|
||||||
|
"\u1EAD" => "a"
|
||||||
|
|
||||||
|
# ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE]
|
||||||
|
"\u1EAF" => "a"
|
||||||
|
|
||||||
|
# ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE]
|
||||||
|
"\u1EB1" => "a"
|
||||||
|
|
||||||
|
# ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE]
|
||||||
|
"\u1EB3" => "a"
|
||||||
|
|
||||||
|
# ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE]
|
||||||
|
"\u1EB5" => "a"
|
||||||
|
|
||||||
|
# ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW]
|
||||||
|
"\u1EB7" => "a"
|
||||||
|
|
||||||
|
# ₐ [LATIN SUBSCRIPT SMALL LETTER A]
|
||||||
|
"\u2090" => "a"
|
||||||
|
|
||||||
|
# ₔ [LATIN SUBSCRIPT SMALL LETTER SCHWA]
|
||||||
|
"\u2094" => "a"
|
||||||
|
|
||||||
|
# ⓐ [CIRCLED LATIN SMALL LETTER A]
|
||||||
|
"\u24D0" => "a"
|
||||||
|
|
||||||
|
# ⱥ [LATIN SMALL LETTER A WITH STROKE]
|
||||||
|
"\u2C65" => "a"
|
||||||
|
|
||||||
|
# Ɐ [LATIN CAPITAL LETTER TURNED A]
|
||||||
|
"\u2C6F" => "a"
|
||||||
|
|
||||||
|
# a [FULLWIDTH LATIN SMALL LETTER A]
|
||||||
|
"\uFF41" => "a"
|
||||||
|
|
||||||
|
# Ꜳ [LATIN CAPITAL LETTER AA]
|
||||||
|
"\uA732" => "AA"
|
||||||
|
|
||||||
|
# Æ [LATIN CAPITAL LETTER AE]
|
||||||
|
"\u00C6" => "AE"
|
||||||
|
|
||||||
|
# Ǣ [LATIN CAPITAL LETTER AE WITH MACRON]
|
||||||
|
"\u01E2" => "AE"
|
||||||
|
|
||||||
|
# Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE]
|
||||||
|
"\u01FC" => "AE"
|
||||||
|
|
||||||
|
# ᴁ [LATIN LETTER SMALL CAPITAL AE]
|
||||||
|
"\u1D01" => "AE"
|
||||||
|
|
||||||
|
# Ꜵ [LATIN CAPITAL LETTER AO]
|
||||||
|
"\uA734" => "AO"
|
||||||
|
|
||||||
|
# Ꜷ [LATIN CAPITAL LETTER AU]
|
||||||
|
"\uA736" => "AU"
|
||||||
|
|
||||||
|
# Ꜹ [LATIN CAPITAL LETTER AV]
|
||||||
|
"\uA738" => "AV"
|
||||||
|
|
||||||
|
# Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR]
|
||||||
|
"\uA73A" => "AV"
|
||||||
|
|
||||||
|
# Ꜽ [LATIN CAPITAL LETTER AY]
|
||||||
|
"\uA73C" => "AY"
|
||||||
|
|
||||||
|
# ⒜ [PARENTHESIZED LATIN SMALL LETTER A]
|
||||||
|
"\u249C" => "(a)"
|
||||||
|
|
||||||
|
# ꜳ [LATIN SMALL LETTER AA]
|
||||||
|
"\uA733" => "aa"
|
||||||
|
|
||||||
|
# æ [LATIN SMALL LETTER AE]
|
||||||
|
"\u00E6" => "ae"
|
||||||
|
|
||||||
|
# ǣ [LATIN SMALL LETTER AE WITH MACRON]
|
||||||
|
"\u01E3" => "ae"
|
||||||
|
|
||||||
|
# ǽ [LATIN SMALL LETTER AE WITH ACUTE]
|
||||||
|
"\u01FD" => "ae"
|
||||||
|
|
||||||
|
# ᴂ [LATIN SMALL LETTER TURNED AE]
|
||||||
|
"\u1D02" => "ae"
|
||||||
|
|
||||||
|
# ꜵ [LATIN SMALL LETTER AO]
|
||||||
|
"\uA735" => "ao"
|
||||||
|
|
||||||
|
# ꜷ [LATIN SMALL LETTER AU]
|
||||||
|
"\uA737" => "au"
|
||||||
|
|
||||||
|
# ꜹ [LATIN SMALL LETTER AV]
|
||||||
|
"\uA739" => "av"
|
||||||
|
|
||||||
|
# ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR]
|
||||||
|
"\uA73B" => "av"
|
||||||
|
|
||||||
|
# ꜽ [LATIN SMALL LETTER AY]
|
||||||
|
"\uA73D" => "ay"
|
||||||
|
|
||||||
|
# Ɓ [LATIN CAPITAL LETTER B WITH HOOK]
|
||||||
|
"\u0181" => "B"
|
||||||
|
|
||||||
|
# Ƃ [LATIN CAPITAL LETTER B WITH TOPBAR]
|
||||||
|
"\u0182" => "B"
|
||||||
|
|
||||||
|
# Ƀ [LATIN CAPITAL LETTER B WITH STROKE]
|
||||||
|
"\u0243" => "B"
|
||||||
|
|
||||||
|
# ʙ [LATIN LETTER SMALL CAPITAL B]
|
||||||
|
"\u0299" => "B"
|
||||||
|
|
||||||
|
# ᴃ [LATIN LETTER SMALL CAPITAL BARRED B]
|
||||||
|
"\u1D03" => "B"
|
||||||
|
|
||||||
|
# Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE]
|
||||||
|
"\u1E02" => "B"
|
||||||
|
|
||||||
|
# Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW]
|
||||||
|
"\u1E04" => "B"
|
||||||
|
|
||||||
|
# Ḇ [LATIN CAPITAL LETTER B WITH LINE BELOW]
|
||||||
|
"\u1E06" => "B"
|
||||||
|
|
||||||
|
# Ⓑ [CIRCLED LATIN CAPITAL LETTER B]
|
||||||
|
"\u24B7" => "B"
|
||||||
|
|
||||||
|
# B [FULLWIDTH LATIN CAPITAL LETTER B]
|
||||||
|
"\uFF22" => "B"
|
||||||
|
|
||||||
|
# ƀ [LATIN SMALL LETTER B WITH STROKE]
|
||||||
|
"\u0180" => "b"
|
||||||
|
|
||||||
|
# ƃ [LATIN SMALL LETTER B WITH TOPBAR]
|
||||||
|
"\u0183" => "b"
|
||||||
|
|
||||||
|
# ɓ [LATIN SMALL LETTER B WITH HOOK]
|
||||||
|
"\u0253" => "b"
|
||||||
|
|
||||||
|
# ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE]
|
||||||
|
"\u1D6C" => "b"
|
||||||
|
|
||||||
|
# ᶀ [LATIN SMALL LETTER B WITH PALATAL HOOK]
|
||||||
|
"\u1D80" => "b"
|
||||||
|
|
||||||
|
# ḃ [LATIN SMALL LETTER B WITH DOT ABOVE]
|
||||||
|
"\u1E03" => "b"
|
||||||
|
|
||||||
|
# ḅ [LATIN SMALL LETTER B WITH DOT BELOW]
|
||||||
|
"\u1E05" => "b"
|
||||||
|
|
||||||
|
# ḇ [LATIN SMALL LETTER B WITH LINE BELOW]
|
||||||
|
"\u1E07" => "b"
|
||||||
|
|
||||||
|
# ⓑ [CIRCLED LATIN SMALL LETTER B]
|
||||||
|
"\u24D1" => "b"
|
||||||
|
|
||||||
|
# b [FULLWIDTH LATIN SMALL LETTER B]
|
||||||
|
"\uFF42" => "b"
|
||||||
|
|
||||||
|
# ⒝ [PARENTHESIZED LATIN SMALL LETTER B]
|
||||||
|
"\u249D" => "(b)"
|
||||||
|
|
||||||
|
# Ç [LATIN CAPITAL LETTER C WITH CEDILLA]
|
||||||
|
"\u00C7" => "C"
|
||||||
|
|
||||||
|
# Ć [LATIN CAPITAL LETTER C WITH ACUTE]
|
||||||
|
"\u0106" => "C"
|
||||||
|
|
||||||
|
# Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX]
|
||||||
|
"\u0108" => "C"
|
||||||
|
|
||||||
|
# Ċ [LATIN CAPITAL LETTER C WITH DOT ABOVE]
|
||||||
|
"\u010A" => "C"
|
||||||
|
|
||||||
|
# Č [LATIN CAPITAL LETTER C WITH CARON]
|
||||||
|
"\u010C" => "C"
|
||||||
|
|
||||||
|
# Ƈ [LATIN CAPITAL LETTER C WITH HOOK]
|
||||||
|
"\u0187" => "C"
|
||||||
|
|
||||||
|
# Ȼ [LATIN CAPITAL LETTER C WITH STROKE]
|
||||||
|
"\u023B" => "C"
|
||||||
|
|
||||||
|
# ʗ [LATIN LETTER STRETCHED C]
|
||||||
|
"\u0297" => "C"
|
||||||
|
|
||||||
|
# ᴄ [LATIN LETTER SMALL CAPITAL C]
|
||||||
|
"\u1D04" => "C"
|
||||||
|
|
||||||
|
# Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE]
|
||||||
|
"\u1E08" => "C"
|
||||||
|
|
||||||
|
# Ⓒ [CIRCLED LATIN CAPITAL LETTER C]
|
||||||
|
"\u24B8" => "C"
|
||||||
|
|
||||||
|
# C [FULLWIDTH LATIN CAPITAL LETTER C]
|
||||||
|
"\uFF23" => "C"
|
||||||
|
|
||||||
|
# ç [LATIN SMALL LETTER C WITH CEDILLA]
|
||||||
|
"\u00E7" => "c"
|
||||||
|
|
||||||
|
# ć [LATIN SMALL LETTER C WITH ACUTE]
|
||||||
|
"\u0107" => "c"
|
||||||
|
|
||||||
|
# ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX]
|
||||||
|
"\u0109" => "c"
|
||||||
|
|
||||||
|
# ċ [LATIN SMALL LETTER C WITH DOT ABOVE]
|
||||||
|
"\u010B" => "c"
|
||||||
|
|
||||||
|
# č [LATIN SMALL LETTER C WITH CARON]
|
||||||
|
"\u010D" => "c"
|
||||||
|
|
||||||
|
# ƈ [LATIN SMALL LETTER C WITH HOOK]
|
||||||
|
"\u0188" => "c"
|
||||||
|
|
||||||
|
# ȼ [LATIN SMALL LETTER C WITH STROKE]
|
||||||
|
"\u023C" => "c"
|
||||||
|
|
||||||
|
# ɕ [LATIN SMALL LETTER C WITH CURL]
|
||||||
|
"\u0255" => "c"
|
||||||
|
|
||||||
|
# ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE]
|
||||||
|
"\u1E09" => "c"
|
||||||
|
|
||||||
|
# ↄ [LATIN SMALL LETTER REVERSED C]
|
||||||
|
"\u2184" => "c"
|
||||||
|
|
||||||
|
# ⓒ [CIRCLED LATIN SMALL LETTER C]
|
||||||
|
"\u24D2" => "c"
|
||||||
|
|
||||||
|
# Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT]
|
||||||
|
"\uA73E" => "c"
|
||||||
|
|
||||||
|
# ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT]
|
||||||
|
"\uA73F" => "c"
|
||||||
|
|
||||||
|
# c [FULLWIDTH LATIN SMALL LETTER C]
|
||||||
|
"\uFF43" => "c"
|
||||||
|
|
||||||
|
# ⒞ [PARENTHESIZED LATIN SMALL LETTER C]
|
||||||
|
"\u249E" => "(c)"
|
||||||
|
|
||||||
|
# Ð [LATIN CAPITAL LETTER ETH]
|
||||||
|
"\u00D0" => "D"
|
||||||
|
|
||||||
|
# Ď [LATIN CAPITAL LETTER D WITH CARON]
|
||||||
|
"\u010E" => "D"
|
||||||
|
|
||||||
|
# Đ [LATIN CAPITAL LETTER D WITH STROKE]
|
||||||
|
"\u0110" => "D"
|
||||||
|
|
||||||
|
# Ɖ [LATIN CAPITAL LETTER AFRICAN D]
|
||||||
|
"\u0189" => "D"
|
||||||
|
|
||||||
|
# Ɗ [LATIN CAPITAL LETTER D WITH HOOK]
|
||||||
|
"\u018A" => "D"
|
||||||
|
|
||||||
|
# Ƌ [LATIN CAPITAL LETTER D WITH TOPBAR]
|
||||||
|
"\u018B" => "D"
|
||||||
|
|
||||||
|
# ᴅ [LATIN LETTER SMALL CAPITAL D]
|
||||||
|
"\u1D05" => "D"
|
||||||
|
|
||||||
|
# ᴆ [LATIN LETTER SMALL CAPITAL ETH]
|
||||||
|
"\u1D06" => "D"
|
||||||
|
|
||||||
|
# Ḋ [LATIN CAPITAL LETTER D WITH DOT ABOVE]
|
||||||
|
"\u1E0A" => "D"
|
||||||
|
|
||||||
|
# Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW]
|
||||||
|
"\u1E0C" => "D"
|
||||||
|
|
||||||
|
# Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW]
|
||||||
|
"\u1E0E" => "D"
|
||||||
|
|
||||||
|
# Ḑ [LATIN CAPITAL LETTER D WITH CEDILLA]
|
||||||
|
"\u1E10" => "D"
|
||||||
|
|
||||||
|
# Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW]
|
||||||
|
"\u1E12" => "D"
|
||||||
|
|
||||||
|
# Ⓓ [CIRCLED LATIN CAPITAL LETTER D]
|
||||||
|
"\u24B9" => "D"
|
||||||
|
|
||||||
|
# Ꝺ [LATIN CAPITAL LETTER INSULAR D]
|
||||||
|
"\uA779" => "D"
|
||||||
|
|
||||||
|
# D [FULLWIDTH LATIN CAPITAL LETTER D]
|
||||||
|
"\uFF24" => "D"
|
||||||
|
|
||||||
|
# ð [LATIN SMALL LETTER ETH]
|
||||||
|
"\u00F0" => "d"
|
||||||
|
|
||||||
|
# ď [LATIN SMALL LETTER D WITH CARON]
|
||||||
|
"\u010F" => "d"
|
||||||
|
|
||||||
|
# đ [LATIN SMALL LETTER D WITH STROKE]
|
||||||
|
"\u0111" => "d"
|
||||||
|
|
||||||
|
# ƌ [LATIN SMALL LETTER D WITH TOPBAR]
|
||||||
|
"\u018C" => "d"
|
||||||
|
|
||||||
|
# ȡ [LATIN SMALL LETTER D WITH CURL]
|
||||||
|
"\u0221" => "d"
|
||||||
|
|
||||||
|
# ɖ [LATIN SMALL LETTER D WITH TAIL]
|
||||||
|
"\u0256" => "d"
|
||||||
|
|
||||||
|
# ɗ [LATIN SMALL LETTER D WITH HOOK]
|
||||||
|
"\u0257" => "d"
|
||||||
|
|
||||||
|
# ᵭ [LATIN SMALL LETTER D WITH MIDDLE TILDE]
|
||||||
|
"\u1D6D" => "d"
|
||||||
|
|
||||||
|
# ᶁ [LATIN SMALL LETTER D WITH PALATAL HOOK]
|
||||||
|
"\u1D81" => "d"
|
||||||
|
|
||||||
|
# ᶑ [LATIN SMALL LETTER D WITH HOOK AND TAIL]
|
||||||
|
"\u1D91" => "d"
|
||||||
|
|
||||||
|
# ḋ [LATIN SMALL LETTER D WITH DOT ABOVE]
|
||||||
|
"\u1E0B" => "d"
|
||||||
|
|
||||||
|
# ḍ [LATIN SMALL LETTER D WITH DOT BELOW]
|
||||||
|
"\u1E0D" => "d"
|
||||||
|
|
||||||
|
# ḏ [LATIN SMALL LETTER D WITH LINE BELOW]
|
||||||
|
"\u1E0F" => "d"
|
||||||
|
|
||||||
|
# ḑ [LATIN SMALL LETTER D WITH CEDILLA]
|
||||||
|
"\u1E11" => "d"
|
||||||
|
|
||||||
|
# ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW]
|
||||||
|
"\u1E13" => "d"
|
||||||
|
|
||||||
|
# ⓓ [CIRCLED LATIN SMALL LETTER D]
|
||||||
|
"\u24D3" => "d"
|
||||||
|
|
||||||
|
# ꝺ [LATIN SMALL LETTER INSULAR D]
|
||||||
|
"\uA77A" => "d"
|
||||||
|
|
||||||
|
# d [FULLWIDTH LATIN SMALL LETTER D]
|
||||||
|
"\uFF44" => "d"
|
||||||
|
|
||||||
|
# DŽ [LATIN CAPITAL LETTER DZ WITH CARON]
|
||||||
|
"\u01C4" => "DZ"
|
||||||
|
|
||||||
|
# DZ [LATIN CAPITAL LETTER DZ]
|
||||||
|
"\u01F1" => "DZ"
|
||||||
|
|
||||||
|
# Dž [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON]
|
||||||
|
"\u01C5" => "Dz"
|
||||||
|
|
||||||
|
# Dz [LATIN CAPITAL LETTER D WITH SMALL LETTER Z]
|
||||||
|
"\u01F2" => "Dz"
|
||||||
|
|
||||||
|
# ⒟ [PARENTHESIZED LATIN SMALL LETTER D]
|
||||||
|
"\u249F" => "(d)"
|
||||||
|
|
||||||
|
# ȸ [LATIN SMALL LETTER DB DIGRAPH]
|
||||||
|
"\u0238" => "db"
|
||||||
|
|
||||||
|
# dž [LATIN SMALL LETTER DZ WITH CARON]
|
||||||
|
"\u01C6" => "dz"
|
||||||
|
|
||||||
|
# dz [LATIN SMALL LETTER DZ]
|
||||||
|
"\u01F3" => "dz"
|
||||||
|
|
||||||
|
# ʣ [LATIN SMALL LETTER DZ DIGRAPH]
|
||||||
|
"\u02A3" => "dz"
|
||||||
|
|
||||||
|
# ʥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL]
|
||||||
|
"\u02A5" => "dz"
|
||||||
|
|
||||||
|
# È [LATIN CAPITAL LETTER E WITH GRAVE]
|
||||||
|
"\u00C8" => "E"
|
||||||
|
|
||||||
|
# É [LATIN CAPITAL LETTER E WITH ACUTE]
|
||||||
|
"\u00C9" => "E"
|
||||||
|
|
||||||
|
# Ê [LATIN CAPITAL LETTER E WITH CIRCUMFLEX]
|
||||||
|
"\u00CA" => "E"
|
||||||
|
|
||||||
|
# Ë [LATIN CAPITAL LETTER E WITH DIAERESIS]
|
||||||
|
"\u00CB" => "E"
|
||||||
|
|
||||||
|
# Ē [LATIN CAPITAL LETTER E WITH MACRON]
|
||||||
|
"\u0112" => "E"
|
||||||
|
|
||||||
|
# Ĕ [LATIN CAPITAL LETTER E WITH BREVE]
|
||||||
|
"\u0114" => "E"
|
||||||
|
|
||||||
|
# Ė [LATIN CAPITAL LETTER E WITH DOT ABOVE]
|
||||||
|
"\u0116" => "E"
|
||||||
|
|
||||||
|
# Ę [LATIN CAPITAL LETTER E WITH OGONEK]
|
||||||
|
"\u0118" => "E"
|
||||||
|
|
||||||
|
# Ě [LATIN CAPITAL LETTER E WITH CARON]
|
||||||
|
"\u011A" => "E"
|
||||||
|
|
||||||
|
# Ǝ [LATIN CAPITAL LETTER REVERSED E]
|
||||||
|
"\u018E" => "E"
|
||||||
|
|
||||||
|
# Ɛ [LATIN CAPITAL LETTER OPEN E]
|
||||||
|
"\u0190" => "E"
|
||||||
|
|
||||||
|
# Ȅ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE]
|
||||||
|
"\u0204" => "E"
|
||||||
|
|
||||||
|
# Ȇ [LATIN CAPITAL LETTER E WITH INVERTED BREVE]
|
||||||
|
"\u0206" => "E"
|
||||||
|
|
||||||
|
# Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA]
|
||||||
|
"\u0228" => "E"
|
||||||
|
|
||||||
|
# Ɇ [LATIN CAPITAL LETTER E WITH STROKE]
|
||||||
|
"\u0246" => "E"
|
||||||
|
|
||||||
|
# ᴇ [LATIN LETTER SMALL CAPITAL E]
|
||||||
|
"\u1D07" => "E"
|
||||||
|
|
||||||
|
# Ḕ [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE]
|
||||||
|
"\u1E14" => "E"
|
||||||
|
|
||||||
|
# Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE]
|
||||||
|
"\u1E16" => "E"
|
||||||
|
|
||||||
|
# Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW]
|
||||||
|
"\u1E18" => "E"
|
||||||
|
|
||||||
|
# Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW]
|
||||||
|
"\u1E1A" => "E"
|
||||||
|
|
||||||
|
# Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE]
|
||||||
|
"\u1E1C" => "E"
|
||||||
|
|
||||||
|
# Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW]
|
||||||
|
"\u1EB8" => "E"
|
||||||
|
|
||||||
|
# Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE]
|
||||||
|
"\u1EBA" => "E"
|
||||||
|
|
||||||
|
# Ẽ [LATIN CAPITAL LETTER E WITH TILDE]
|
||||||
|
"\u1EBC" => "E"
|
||||||
|
|
||||||
|
# Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE]
|
||||||
|
"\u1EBE" => "E"
|
||||||
|
|
||||||
|
# Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE]
|
||||||
|
"\u1EC0" => "E"
|
||||||
|
|
||||||
|
# Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
|
||||||
|
"\u1EC2" => "E"
|
||||||
|
|
||||||
|
# Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE]
|
||||||
|
"\u1EC4" => "E"
|
||||||
|
|
||||||
|
# Ệ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
|
||||||
|
"\u1EC6" => "E"
|
||||||
|
|
||||||
|
# Ⓔ [CIRCLED LATIN CAPITAL LETTER E]
|
||||||
|
"\u24BA" => "E"
|
||||||
|
|
||||||
|
# ⱻ [LATIN LETTER SMALL CAPITAL TURNED E]
|
||||||
|
"\u2C7B" => "E"
|
||||||
|
|
||||||
|
# E [FULLWIDTH LATIN CAPITAL LETTER E]
|
||||||
|
"\uFF25" => "E"
|
||||||
|
|
||||||
|
# è [LATIN SMALL LETTER E WITH GRAVE]
|
||||||
|
"\u00E8" => "e"
|
||||||
|
|
||||||
|
# é [LATIN SMALL LETTER E WITH ACUTE]
|
||||||
|
"\u00E9" => "e"
|
||||||
|
|
||||||
|
# ê [LATIN SMALL LETTER E WITH CIRCUMFLEX]
|
||||||
|
"\u00EA" => "e"
|
||||||
|
|
||||||
|
# ë [LATIN SMALL LETTER E WITH DIAERESIS]
|
||||||
|
"\u00EB" => "e"
|
||||||
|
|
||||||
|
# ē [LATIN SMALL LETTER E WITH MACRON]
|
||||||
|
"\u0113" => "e"
|
||||||
|
|
||||||
|
# ĕ [LATIN SMALL LETTER E WITH BREVE]
|
||||||
|
"\u0115" => "e"
|
||||||
|
|
||||||
|
# ė [LATIN SMALL LETTER E WITH DOT ABOVE]
|
||||||
|
"\u0117" => "e"
|
||||||
|
|
||||||
|
# ę [LATIN SMALL LETTER E WITH OGONEK]
|
||||||
|
"\u0119" => "e"
|
||||||
|
|
||||||
|
# ě [LATIN SMALL LETTER E WITH CARON]
|
||||||
|
"\u011B" => "e"
|
||||||
|
|
||||||
|
# ǝ [LATIN SMALL LETTER TURNED E]
|
||||||
|
"\u01DD" => "e"
|
||||||
|
|
||||||
|
# ȅ [LATIN SMALL LETTER E WITH DOUBLE GRAVE]
|
||||||
|
"\u0205" => "e"
|
||||||
|
|
||||||
|
# ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE]
|
||||||
|
"\u0207" => "e"
|
||||||
|
|
||||||
|
# ȩ [LATIN SMALL LETTER E WITH CEDILLA]
|
||||||
|
"\u0229" => "e"
|
||||||
|
|
||||||
|
# ɇ [LATIN SMALL LETTER E WITH STROKE]
|
||||||
|
"\u0247" => "e"
|
||||||
|
|
||||||
|
# ɘ [LATIN SMALL LETTER REVERSED E]
|
||||||
|
"\u0258" => "e"
|
||||||
|
|
||||||
|
# ɛ [LATIN SMALL LETTER OPEN E]
|
||||||
|
"\u025B" => "e"
|
||||||
|
|
||||||
|
# ɜ [LATIN SMALL LETTER REVERSED OPEN E]
|
||||||
|
"\u025C" => "e"
|
||||||
|
|
||||||
|
# ɝ [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK]
|
||||||
|
"\u025D" => "e"
|
||||||
|
|
||||||
|
# ɞ [LATIN SMALL LETTER CLOSED REVERSED OPEN E]
|
||||||
|
"\u025E" => "e"
|
||||||
|
|
||||||
|
# ʚ [LATIN SMALL LETTER CLOSED OPEN E]
|
||||||
|
"\u029A" => "e"
|
||||||
|
|
||||||
|
# ᴈ [LATIN SMALL LETTER TURNED OPEN E]
|
||||||
|
"\u1D08" => "e"
|
||||||
|
|
||||||
|
# ᶒ [LATIN SMALL LETTER E WITH RETROFLEX HOOK]
|
||||||
|
"\u1D92" => "e"
|
||||||
|
|
||||||
|
# ᶓ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK]
|
||||||
|
"\u1D93" => "e"
|
||||||
|
|
||||||
|
# ᶔ [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK]
|
||||||
|
"\u1D94" => "e"
|
||||||
|
|
||||||
|
# ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE]
|
||||||
|
"\u1E15" => "e"
|
||||||
|
|
||||||
|
# ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE]
|
||||||
|
"\u1E17" => "e"
|
||||||
|
|
||||||
|
# ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW]
|
||||||
|
"\u1E19" => "e"
|
||||||
|
|
||||||
|
# ḛ [LATIN SMALL LETTER E WITH TILDE BELOW]
|
||||||
|
"\u1E1B" => "e"
|
||||||
|
|
||||||
|
# ḝ [LATIN SMALL LETTER E WITH CEDILLA AND BREVE]
|
||||||
|
"\u1E1D" => "e"
|
||||||
|
|
||||||
|
# ẹ [LATIN SMALL LETTER E WITH DOT BELOW]
|
||||||
|
"\u1EB9" => "e"
|
||||||
|
|
||||||
|
# ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE]
|
||||||
|
"\u1EBB" => "e"
|
||||||
|
|
||||||
|
# ẽ [LATIN SMALL LETTER E WITH TILDE]
|
||||||
|
"\u1EBD" => "e"
|
||||||
|
|
||||||
|
# ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE]
|
||||||
|
"\u1EBF" => "e"
|
||||||
|
|
||||||
|
# ề [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE]
|
||||||
|
"\u1EC1" => "e"
|
||||||
|
|
||||||
|
# ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
|
||||||
|
"\u1EC3" => "e"
|
||||||
|
|
||||||
|
# ễ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE]
|
||||||
|
"\u1EC5" => "e"
|
||||||
|
|
||||||
|
# ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
|
||||||
|
"\u1EC7" => "e"
|
||||||
|
|
||||||
|
# ₑ [LATIN SUBSCRIPT SMALL LETTER E]
|
||||||
|
"\u2091" => "e"
|
||||||
|
|
||||||
|
# ⓔ [CIRCLED LATIN SMALL LETTER E]
|
||||||
|
"\u24D4" => "e"
|
||||||
|
|
||||||
|
# ⱸ [LATIN SMALL LETTER E WITH NOTCH]
|
||||||
|
"\u2C78" => "e"
|
||||||
|
|
||||||
|
# e [FULLWIDTH LATIN SMALL LETTER E]
|
||||||
|
"\uFF45" => "e"
|
||||||
|
|
||||||
|
# ⒠ [PARENTHESIZED LATIN SMALL LETTER E]
|
||||||
|
"\u24A0" => "(e)"
|
||||||
|
|
||||||
|
# Ƒ [LATIN CAPITAL LETTER F WITH HOOK]
|
||||||
|
"\u0191" => "F"
|
||||||
|
|
||||||
|
# Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE]
|
||||||
|
"\u1E1E" => "F"
|
||||||
|
|
||||||
|
# Ⓕ [CIRCLED LATIN CAPITAL LETTER F]
|
||||||
|
"\u24BB" => "F"
|
||||||
|
|
||||||
|
# ꜰ [LATIN LETTER SMALL CAPITAL F]
|
||||||
|
"\uA730" => "F"
|
||||||
|
|
||||||
|
# Ꝼ [LATIN CAPITAL LETTER INSULAR F]
|
||||||
|
"\uA77B" => "F"
|
||||||
|
|
||||||
|
# ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F]
|
||||||
|
"\uA7FB" => "F"
|
||||||
|
|
||||||
|
# F [FULLWIDTH LATIN CAPITAL LETTER F]
|
||||||
|
"\uFF26" => "F"
|
||||||
|
|
||||||
|
# ƒ [LATIN SMALL LETTER F WITH HOOK]
|
||||||
|
"\u0192" => "f"
|
||||||
|
|
||||||
|
# ᵮ [LATIN SMALL LETTER F WITH MIDDLE TILDE]
|
||||||
|
"\u1D6E" => "f"
|
||||||
|
|
||||||
|
# ᶂ [LATIN SMALL LETTER F WITH PALATAL HOOK]
|
||||||
|
"\u1D82" => "f"
|
||||||
|
|
||||||
|
# ḟ [LATIN SMALL LETTER F WITH DOT ABOVE]
|
||||||
|
"\u1E1F" => "f"
|
||||||
|
|
||||||
|
# ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE]
|
||||||
|
"\u1E9B" => "f"
|
||||||
|
|
||||||
|
# ⓕ [CIRCLED LATIN SMALL LETTER F]
|
||||||
|
"\u24D5" => "f"
|
||||||
|
|
||||||
|
# ꝼ [LATIN SMALL LETTER INSULAR F]
|
||||||
|
"\uA77C" => "f"
|
||||||
|
|
||||||
|
# f [FULLWIDTH LATIN SMALL LETTER F]
|
||||||
|
"\uFF46" => "f"
|
||||||
|
|
||||||
|
# ⒡ [PARENTHESIZED LATIN SMALL LETTER F]
|
||||||
|
"\u24A1" => "(f)"
|
||||||
|
|
||||||
|
# ff [LATIN SMALL LIGATURE FF]
|
||||||
|
"\uFB00" => "ff"
|
||||||
|
|
||||||
|
# ffi [LATIN SMALL LIGATURE FFI]
|
||||||
|
"\uFB03" => "ffi"
|
||||||
|
|
||||||
|
# ffl [LATIN SMALL LIGATURE FFL]
|
||||||
|
"\uFB04" => "ffl"
|
||||||
|
|
||||||
|
# fi [LATIN SMALL LIGATURE FI]
|
||||||
|
"\uFB01" => "fi"
|
||||||
|
|
||||||
|
# fl [LATIN SMALL LIGATURE FL]
|
||||||
|
"\uFB02" => "fl"
|
||||||
|
|
||||||
|
# Ĝ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX]
|
||||||
|
"\u011C" => "G"
|
||||||
|
|
||||||
|
# Ğ [LATIN CAPITAL LETTER G WITH BREVE]
|
||||||
|
"\u011E" => "G"
|
||||||
|
|
||||||
|
# Ġ [LATIN CAPITAL LETTER G WITH DOT ABOVE]
|
||||||
|
"\u0120" => "G"
|
||||||
|
|
||||||
|
# Ģ [LATIN CAPITAL LETTER G WITH CEDILLA]
|
||||||
|
"\u0122" => "G"
|
||||||
|
|
||||||
|
# Ɠ [LATIN CAPITAL LETTER G WITH HOOK]
|
||||||
|
"\u0193" => "G"
|
||||||
|
|
||||||
|
# Ǥ [LATIN CAPITAL LETTER G WITH STROKE]
|
||||||
|
"\u01E4" => "G"
|
||||||
|
|
||||||
|
# ǥ [LATIN SMALL LETTER G WITH STROKE]
|
||||||
|
"\u01E5" => "G"
|
||||||
|
|
||||||
|
# Ǧ [LATIN CAPITAL LETTER G WITH CARON]
|
||||||
|
"\u01E6" => "G"
|
||||||
|
|
||||||
|
# ǧ [LATIN SMALL LETTER G WITH CARON]
|
||||||
|
"\u01E7" => "G"
|
||||||
|
|
||||||
|
# Ǵ [LATIN CAPITAL LETTER G WITH ACUTE]
|
||||||
|
"\u01F4" => "G"
|
||||||
|
|
||||||
|
# ɢ [LATIN LETTER SMALL CAPITAL G]
|
||||||
|
"\u0262" => "G"
|
||||||
|
|
||||||
|
# ʛ [LATIN LETTER SMALL CAPITAL G WITH HOOK]
|
||||||
|
"\u029B" => "G"
|
||||||
|
|
||||||
|
# Ḡ [LATIN CAPITAL LETTER G WITH MACRON]
|
||||||
|
"\u1E20" => "G"
|
||||||
|
|
||||||
|
# Ⓖ [CIRCLED LATIN CAPITAL LETTER G]
|
||||||
|
"\u24BC" => "G"
|
||||||
|
|
||||||
|
# Ᵹ [LATIN CAPITAL LETTER INSULAR G]
|
||||||
|
"\uA77D" => "G"
|
||||||
|
|
||||||
|
# Ꝿ [LATIN CAPITAL LETTER TURNED INSULAR G]
|
||||||
|
"\uA77E" => "G"
|
||||||
|
|
||||||
|
# G [FULLWIDTH LATIN CAPITAL LETTER G]
|
||||||
|
"\uFF27" => "G"
|
||||||
|
|
||||||
|
# ĝ [LATIN SMALL LETTER G WITH CIRCUMFLEX]
|
||||||
|
"\u011D" => "g"
|
||||||
|
|
||||||
|
# ğ [LATIN SMALL LETTER G WITH BREVE]
|
||||||
|
"\u011F" => "g"
|
||||||
|
|
||||||
|
# ġ [LATIN SMALL LETTER G WITH DOT ABOVE]
|
||||||
|
"\u0121" => "g"
|
||||||
|
|
||||||
|
# ģ [LATIN SMALL LETTER G WITH CEDILLA]
|
||||||
|
"\u0123" => "g"
|
||||||
|
|
||||||
|
# ǵ [LATIN SMALL LETTER G WITH ACUTE]
|
||||||
|
"\u01F5" => "g"
|
||||||
|
|
||||||
|
# ɠ [LATIN SMALL LETTER G WITH HOOK]
|
||||||
|
"\u0260" => "g"
|
||||||
|
|
||||||
|
# ɡ [LATIN SMALL LETTER SCRIPT G]
|
||||||
|
"\u0261" => "g"
|
||||||
|
|
||||||
|
# ᵷ [LATIN SMALL LETTER TURNED G]
|
||||||
|
"\u1D77" => "g"
|
||||||
|
|
||||||
|
# ᵹ [LATIN SMALL LETTER INSULAR G]
|
||||||
|
"\u1D79" => "g"
|
||||||
|
|
||||||
|
# ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK]
|
||||||
|
"\u1D83" => "g"
|
||||||
|
|
||||||
|
# ḡ [LATIN SMALL LETTER G WITH MACRON]
|
||||||
|
"\u1E21" => "g"
|
||||||
|
|
||||||
|
# ⓖ [CIRCLED LATIN SMALL LETTER G]
|
||||||
|
"\u24D6" => "g"
|
||||||
|
|
||||||
|
# ꝿ [LATIN SMALL LETTER TURNED INSULAR G]
|
||||||
|
"\uA77F" => "g"
|
||||||
|
|
||||||
|
# g [FULLWIDTH LATIN SMALL LETTER G]
|
||||||
|
"\uFF47" => "g"
|
||||||
|
|
||||||
|
# ⒢ [PARENTHESIZED LATIN SMALL LETTER G]
|
||||||
|
"\u24A2" => "(g)"
|
||||||
|
|
||||||
|
# Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX]
|
||||||
|
"\u0124" => "H"
|
||||||
|
|
||||||
|
# Ħ [LATIN CAPITAL LETTER H WITH STROKE]
|
||||||
|
"\u0126" => "H"
|
||||||
|
|
||||||
|
# Ȟ [LATIN CAPITAL LETTER H WITH CARON]
|
||||||
|
"\u021E" => "H"
|
||||||
|
|
||||||
|
# ʜ [LATIN LETTER SMALL CAPITAL H]
|
||||||
|
"\u029C" => "H"
|
||||||
|
|
||||||
|
# Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE]
|
||||||
|
"\u1E22" => "H"
|
||||||
|
|
||||||
|
# Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW]
|
||||||
|
"\u1E24" => "H"
|
||||||
|
|
||||||
|
# Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS]
|
||||||
|
"\u1E26" => "H"
|
||||||
|
|
||||||
|
# Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA]
|
||||||
|
"\u1E28" => "H"
|
||||||
|
|
||||||
|
# Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW]
|
||||||
|
"\u1E2A" => "H"
|
||||||
|
|
||||||
|
# Ⓗ [CIRCLED LATIN CAPITAL LETTER H]
|
||||||
|
"\u24BD" => "H"
|
||||||
|
|
||||||
|
# Ⱨ [LATIN CAPITAL LETTER H WITH DESCENDER]
|
||||||
|
"\u2C67" => "H"
|
||||||
|
|
||||||
|
# Ⱶ [LATIN CAPITAL LETTER HALF H]
|
||||||
|
"\u2C75" => "H"
|
||||||
|
|
||||||
|
# H [FULLWIDTH LATIN CAPITAL LETTER H]
|
||||||
|
"\uFF28" => "H"
|
||||||
|
|
||||||
|
# ĥ [LATIN SMALL LETTER H WITH CIRCUMFLEX]
|
||||||
|
"\u0125" => "h"
|
||||||
|
|
||||||
|
# ħ [LATIN SMALL LETTER H WITH STROKE]
|
||||||
|
"\u0127" => "h"
|
||||||
|
|
||||||
|
# ȟ [LATIN SMALL LETTER H WITH CARON]
|
||||||
|
"\u021F" => "h"
|
||||||
|
|
||||||
|
# ɥ [LATIN SMALL LETTER TURNED H]
|
||||||
|
"\u0265" => "h"
|
||||||
|
|
||||||
|
# ɦ [LATIN SMALL LETTER H WITH HOOK]
|
||||||
|
"\u0266" => "h"
|
||||||
|
|
||||||
|
# ʮ [LATIN SMALL LETTER TURNED H WITH FISHHOOK]
|
||||||
|
"\u02AE" => "h"
|
||||||
|
|
||||||
|
# ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL]
|
||||||
|
"\u02AF" => "h"
|
||||||
|
|
||||||
|
# ḣ [LATIN SMALL LETTER H WITH DOT ABOVE]
|
||||||
|
"\u1E23" => "h"
|
||||||
|
|
||||||
|
# ḥ [LATIN SMALL LETTER H WITH DOT BELOW]
|
||||||
|
"\u1E25" => "h"
|
||||||
|
|
||||||
|
# ḧ [LATIN SMALL LETTER H WITH DIAERESIS]
|
||||||
|
"\u1E27" => "h"
|
||||||
|
|
||||||
|
# ḩ [LATIN SMALL LETTER H WITH CEDILLA]
|
||||||
|
"\u1E29" => "h"
|
||||||
|
|
||||||
|
# ḫ [LATIN SMALL LETTER H WITH BREVE BELOW]
|
||||||
|
"\u1E2B" => "h"
|
||||||
|
|
||||||
|
# ẖ [LATIN SMALL LETTER H WITH LINE BELOW]
|
||||||
|
"\u1E96" => "h"
|
||||||
|
|
||||||
|
# ⓗ [CIRCLED LATIN SMALL LETTER H]
|
||||||
|
"\u24D7" => "h"
|
||||||
|
|
||||||
|
# ⱨ [LATIN SMALL LETTER H WITH DESCENDER]
|
||||||
|
"\u2C68" => "h"
|
||||||
|
|
||||||
|
# ⱶ [LATIN SMALL LETTER HALF H]
|
||||||
|
"\u2C76" => "h"
|
||||||
|
|
||||||
|
# h [FULLWIDTH LATIN SMALL LETTER H]
|
||||||
|
"\uFF48" => "h"
|
||||||
|
|
||||||
|
# Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR]
|
||||||
|
"\u01F6" => "HV"
|
||||||
|
|
||||||
|
# ⒣ [PARENTHESIZED LATIN SMALL LETTER H]
|
||||||
|
"\u24A3" => "(h)"
|
||||||
|
|
||||||
|
# ƕ [LATIN SMALL LETTER HV]
|
||||||
|
"\u0195" => "hv"
|
||||||
|
|
||||||
|
# Ì [LATIN CAPITAL LETTER I WITH GRAVE]
|
||||||
|
"\u00CC" => "I"
|
||||||
|
|
||||||
|
# Í [LATIN CAPITAL LETTER I WITH ACUTE]
|
||||||
|
"\u00CD" => "I"
|
||||||
|
|
||||||
|
# Î [LATIN CAPITAL LETTER I WITH CIRCUMFLEX]
|
||||||
|
"\u00CE" => "I"
|
||||||
|
|
||||||
|
# Ï [LATIN CAPITAL LETTER I WITH DIAERESIS]
|
||||||
|
"\u00CF" => "I"
|
||||||
|
|
||||||
|
# Ĩ [LATIN CAPITAL LETTER I WITH TILDE]
|
||||||
|
"\u0128" => "I"
|
||||||
|
|
||||||
|
# Ī [LATIN CAPITAL LETTER I WITH MACRON]
|
||||||
|
"\u012A" => "I"
|
||||||
|
|
||||||
|
# Ĭ [LATIN CAPITAL LETTER I WITH BREVE]
|
||||||
|
"\u012C" => "I"
|
||||||
|
|
||||||
|
# Į [LATIN CAPITAL LETTER I WITH OGONEK]
|
||||||
|
"\u012E" => "I"
|
||||||
|
|
||||||
|
# İ [LATIN CAPITAL LETTER I WITH DOT ABOVE]
|
||||||
|
"\u0130" => "I"
|
||||||
|
|
||||||
|
# Ɩ [LATIN CAPITAL LETTER IOTA]
|
||||||
|
"\u0196" => "I"
|
||||||
|
|
||||||
|
# Ɨ [LATIN CAPITAL LETTER I WITH STROKE]
|
||||||
|
"\u0197" => "I"
|
||||||
|
|
||||||
|
# Ǐ [LATIN CAPITAL LETTER I WITH CARON]
|
||||||
|
"\u01CF" => "I"
|
||||||
|
|
||||||
|
# Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE]
|
||||||
|
"\u0208" => "I"
|
||||||
|
|
||||||
|
# Ȋ [LATIN CAPITAL LETTER I WITH INVERTED BREVE]
|
||||||
|
"\u020A" => "I"
|
||||||
|
|
||||||
|
# ɪ [LATIN LETTER SMALL CAPITAL I]
|
||||||
|
"\u026A" => "I"
|
||||||
|
|
||||||
|
# ᵻ [LATIN SMALL CAPITAL LETTER I WITH STROKE]
|
||||||
|
"\u1D7B" => "I"
|
||||||
|
|
||||||
|
# Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW]
|
||||||
|
"\u1E2C" => "I"
|
||||||
|
|
||||||
|
# Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE]
|
||||||
|
"\u1E2E" => "I"
|
||||||
|
|
||||||
|
# Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE]
|
||||||
|
"\u1EC8" => "I"
|
||||||
|
|
||||||
|
# Ị [LATIN CAPITAL LETTER I WITH DOT BELOW]
|
||||||
|
"\u1ECA" => "I"
|
||||||
|
|
||||||
|
# Ⓘ [CIRCLED LATIN CAPITAL LETTER I]
|
||||||
|
"\u24BE" => "I"
|
||||||
|
|
||||||
|
# ꟾ [LATIN EPIGRAPHIC LETTER I LONGA]
|
||||||
|
"\uA7FE" => "I"
|
||||||
|
|
||||||
|
# I [FULLWIDTH LATIN CAPITAL LETTER I]
|
||||||
|
"\uFF29" => "I"
|
||||||
|
|
||||||
|
# ì [LATIN SMALL LETTER I WITH GRAVE]
|
||||||
|
"\u00EC" => "i"
|
||||||
|
|
||||||
|
# í [LATIN SMALL LETTER I WITH ACUTE]
|
||||||
|
"\u00ED" => "i"
|
||||||
|
|
||||||
|
# î [LATIN SMALL LETTER I WITH CIRCUMFLEX]
|
||||||
|
"\u00EE" => "i"
|
||||||
|
|
||||||
|
# ï [LATIN SMALL LETTER I WITH DIAERESIS]
|
||||||
|
"\u00EF" => "i"
|
||||||
|
|
||||||
|
# ĩ [LATIN SMALL LETTER I WITH TILDE]
|
||||||
|
"\u0129" => "i"
|
||||||
|
|
||||||
|
# ī [LATIN SMALL LETTER I WITH MACRON]
|
||||||
|
"\u012B" => "i"
|
||||||
|
|
||||||
|
# ĭ [LATIN SMALL LETTER I WITH BREVE]
|
||||||
|
"\u012D" => "i"
|
||||||
|
|
||||||
|
# į [LATIN SMALL LETTER I WITH OGONEK]
|
||||||
|
"\u012F" => "i"
|
||||||
|
|
||||||
|
# ı [LATIN SMALL LETTER DOTLESS I]
|
||||||
|
"\u0131" => "i"
|
||||||
|
|
||||||
|
# ǐ [LATIN SMALL LETTER I WITH CARON]
|
||||||
|
"\u01D0" => "i"
|
||||||
|
|
||||||
|
# ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE]
|
||||||
|
"\u0209" => "i"
|
||||||
|
|
||||||
|
# ȋ [LATIN SMALL LETTER I WITH INVERTED BREVE]
|
||||||
|
"\u020B" => "i"
|
||||||
|
|
||||||
|
# ɨ [LATIN SMALL LETTER I WITH STROKE]
|
||||||
|
"\u0268" => "i"
|
||||||
|
|
||||||
|
# ᴉ [LATIN SMALL LETTER TURNED I]
|
||||||
|
"\u1D09" => "i"
|
||||||
|
|
||||||
|
# ᵢ [LATIN SUBSCRIPT SMALL LETTER I]
|
||||||
|
"\u1D62" => "i"
|
||||||
|
|
||||||
|
# ᵼ [LATIN SMALL LETTER IOTA WITH STROKE]
|
||||||
|
"\u1D7C" => "i"
|
||||||
|
|
||||||
|
# ᶖ [LATIN SMALL LETTER I WITH RETROFLEX HOOK]
|
||||||
|
"\u1D96" => "i"
|
||||||
|
|
||||||
|
# ḭ [LATIN SMALL LETTER I WITH TILDE BELOW]
|
||||||
|
"\u1E2D" => "i"
|
||||||
|
|
||||||
|
# ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE]
|
||||||
|
"\u1E2F" => "i"
|
||||||
|
|
||||||
|
# ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE]
|
||||||
|
"\u1EC9" => "i"
|
||||||
|
|
||||||
|
# ị [LATIN SMALL LETTER I WITH DOT BELOW]
|
||||||
|
"\u1ECB" => "i"
|
||||||
|
|
||||||
|
# ⁱ [SUPERSCRIPT LATIN SMALL LETTER I]
|
||||||
|
"\u2071" => "i"
|
||||||
|
|
||||||
|
# ⓘ [CIRCLED LATIN SMALL LETTER I]
|
||||||
|
"\u24D8" => "i"
|
||||||
|
|
||||||
|
# i [FULLWIDTH LATIN SMALL LETTER I]
|
||||||
|
"\uFF49" => "i"
|
||||||
|
|
||||||
|
# IJ [LATIN CAPITAL LIGATURE IJ]
|
||||||
|
"\u0132" => "IJ"
|
||||||
|
|
||||||
|
# ⒤ [PARENTHESIZED LATIN SMALL LETTER I]
|
||||||
|
"\u24A4" => "(i)"
|
||||||
|
|
||||||
|
# ij [LATIN SMALL LIGATURE IJ]
|
||||||
|
"\u0133" => "ij"
|
||||||
|
|
||||||
|
# Ĵ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX]
|
||||||
|
"\u0134" => "J"
|
||||||
|
|
||||||
|
# Ɉ [LATIN CAPITAL LETTER J WITH STROKE]
|
||||||
|
"\u0248" => "J"
|
||||||
|
|
||||||
|
# ᴊ [LATIN LETTER SMALL CAPITAL J]
|
||||||
|
"\u1D0A" => "J"
|
||||||
|
|
||||||
|
# Ⓙ [CIRCLED LATIN CAPITAL LETTER J]
|
||||||
|
"\u24BF" => "J"
|
||||||
|
|
||||||
|
# J [FULLWIDTH LATIN CAPITAL LETTER J]
|
||||||
|
"\uFF2A" => "J"
|
||||||
|
|
||||||
|
# ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX]
|
||||||
|
"\u0135" => "j"
|
||||||
|
|
||||||
|
# ǰ [LATIN SMALL LETTER J WITH CARON]
|
||||||
|
"\u01F0" => "j"
|
||||||
|
|
||||||
|
# ȷ [LATIN SMALL LETTER DOTLESS J]
|
||||||
|
"\u0237" => "j"
|
||||||
|
|
||||||
|
# ɉ [LATIN SMALL LETTER J WITH STROKE]
|
||||||
|
"\u0249" => "j"
|
||||||
|
|
||||||
|
# ɟ [LATIN SMALL LETTER DOTLESS J WITH STROKE]
|
||||||
|
"\u025F" => "j"
|
||||||
|
|
||||||
|
# ʄ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK]
|
||||||
|
"\u0284" => "j"
|
||||||
|
|
||||||
|
# ʝ [LATIN SMALL LETTER J WITH CROSSED-TAIL]
|
||||||
|
"\u029D" => "j"
|
||||||
|
|
||||||
|
# ⓙ [CIRCLED LATIN SMALL LETTER J]
|
||||||
|
"\u24D9" => "j"
|
||||||
|
|
||||||
|
# ⱼ [LATIN SUBSCRIPT SMALL LETTER J]
|
||||||
|
"\u2C7C" => "j"
|
||||||
|
|
||||||
|
# j [FULLWIDTH LATIN SMALL LETTER J]
|
||||||
|
"\uFF4A" => "j"
|
||||||
|
|
||||||
|
# ⒥ [PARENTHESIZED LATIN SMALL LETTER J]
|
||||||
|
"\u24A5" => "(j)"
|
||||||
|
|
||||||
|
# Ķ [LATIN CAPITAL LETTER K WITH CEDILLA]
|
||||||
|
"\u0136" => "K"
|
||||||
|
|
||||||
|
# Ƙ [LATIN CAPITAL LETTER K WITH HOOK]
|
||||||
|
"\u0198" => "K"
|
||||||
|
|
||||||
|
# Ǩ [LATIN CAPITAL LETTER K WITH CARON]
|
||||||
|
"\u01E8" => "K"
|
||||||
|
|
||||||
|
# ᴋ [LATIN LETTER SMALL CAPITAL K]
|
||||||
|
"\u1D0B" => "K"
|
||||||
|
|
||||||
|
# Ḱ [LATIN CAPITAL LETTER K WITH ACUTE]
|
||||||
|
"\u1E30" => "K"
|
||||||
|
|
||||||
|
# Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW]
|
||||||
|
"\u1E32" => "K"
|
||||||
|
|
||||||
|
# Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW]
|
||||||
|
"\u1E34" => "K"
|
||||||
|
|
||||||
|
# Ⓚ [CIRCLED LATIN CAPITAL LETTER K]
|
||||||
|
"\u24C0" => "K"
|
||||||
|
|
||||||
|
# Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER]
|
||||||
|
"\u2C69" => "K"
|
||||||
|
|
||||||
|
# Ꝁ [LATIN CAPITAL LETTER K WITH STROKE]
|
||||||
|
"\uA740" => "K"
|
||||||
|
|
||||||
|
# Ꝃ [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE]
|
||||||
|
"\uA742" => "K"
|
||||||
|
|
||||||
|
# Ꝅ [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE]
|
||||||
|
"\uA744" => "K"
|
||||||
|
|
||||||
|
# K [FULLWIDTH LATIN CAPITAL LETTER K]
|
||||||
|
"\uFF2B" => "K"
|
||||||
|
|
||||||
|
# ķ [LATIN SMALL LETTER K WITH CEDILLA]
|
||||||
|
"\u0137" => "k"
|
||||||
|
|
||||||
|
# ƙ [LATIN SMALL LETTER K WITH HOOK]
|
||||||
|
"\u0199" => "k"
|
||||||
|
|
||||||
|
# ǩ [LATIN SMALL LETTER K WITH CARON]
|
||||||
|
"\u01E9" => "k"
|
||||||
|
|
||||||
|
# ʞ [LATIN SMALL LETTER TURNED K]
|
||||||
|
"\u029E" => "k"
|
||||||
|
|
||||||
|
# ᶄ [LATIN SMALL LETTER K WITH PALATAL HOOK]
|
||||||
|
"\u1D84" => "k"
|
||||||
|
|
||||||
|
# ḱ [LATIN SMALL LETTER K WITH ACUTE]
|
||||||
|
"\u1E31" => "k"
|
||||||
|
|
||||||
|
# ḳ [LATIN SMALL LETTER K WITH DOT BELOW]
|
||||||
|
"\u1E33" => "k"
|
||||||
|
|
||||||
|
# ḵ [LATIN SMALL LETTER K WITH LINE BELOW]
|
||||||
|
"\u1E35" => "k"
|
||||||
|
|
||||||
|
# ⓚ [CIRCLED LATIN SMALL LETTER K]
|
||||||
|
"\u24DA" => "k"
|
||||||
|
|
||||||
|
# ⱪ [LATIN SMALL LETTER K WITH DESCENDER]
|
||||||
|
"\u2C6A" => "k"
|
||||||
|
|
||||||
|
# ꝁ [LATIN SMALL LETTER K WITH STROKE]
|
||||||
|
"\uA741" => "k"
|
||||||
|
|
||||||
|
# ꝃ [LATIN SMALL LETTER K WITH DIAGONAL STROKE]
|
||||||
|
"\uA743" => "k"
|
||||||
|
|
||||||
|
# ꝅ [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE]
|
||||||
|
"\uA745" => "k"
|
||||||
|
|
||||||
|
# k [FULLWIDTH LATIN SMALL LETTER K]
|
||||||
|
"\uFF4B" => "k"
|
||||||
|
|
||||||
|
# ⒦ [PARENTHESIZED LATIN SMALL LETTER K]
|
||||||
|
"\u24A6" => "(k)"
|
||||||
|
|
||||||
|
# Ĺ [LATIN CAPITAL LETTER L WITH ACUTE]
|
||||||
|
"\u0139" => "L"
|
||||||
|
|
||||||
|
# Ļ [LATIN CAPITAL LETTER L WITH CEDILLA]
|
||||||
|
"\u013B" => "L"
|
||||||
|
|
||||||
|
# Ľ [LATIN CAPITAL LETTER L WITH CARON]
|
||||||
|
"\u013D" => "L"
|
||||||
|
|
||||||
|
# Ŀ [LATIN CAPITAL LETTER L WITH MIDDLE DOT]
|
||||||
|
"\u013F" => "L"
|
||||||
|
|
||||||
|
# Ł [LATIN CAPITAL LETTER L WITH STROKE]
|
||||||
|
"\u0141" => "L"
|
||||||
|
|
||||||
|
# Ƚ [LATIN CAPITAL LETTER L WITH BAR]
|
||||||
|
"\u023D" => "L"
|
||||||
|
|
||||||
|
# ʟ [LATIN LETTER SMALL CAPITAL L]
|
||||||
|
"\u029F" => "L"
|
||||||
|
|
||||||
|
# ᴌ [LATIN LETTER SMALL CAPITAL L WITH STROKE]
|
||||||
|
"\u1D0C" => "L"
|
||||||
|
|
||||||
|
# Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW]
|
||||||
|
"\u1E36" => "L"
|
||||||
|
|
||||||
|
# Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON]
|
||||||
|
"\u1E38" => "L"
|
||||||
|
|
||||||
|
# Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW]
|
||||||
|
"\u1E3A" => "L"
|
||||||
|
|
||||||
|
# Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW]
|
||||||
|
"\u1E3C" => "L"
|
||||||
|
|
||||||
|
# Ⓛ [CIRCLED LATIN CAPITAL LETTER L]
|
||||||
|
"\u24C1" => "L"
|
||||||
|
|
||||||
|
# Ⱡ [LATIN CAPITAL LETTER L WITH DOUBLE BAR]
|
||||||
|
"\u2C60" => "L"
|
||||||
|
|
||||||
|
# Ɫ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE]
|
||||||
|
"\u2C62" => "L"
|
||||||
|
|
||||||
|
# Ꝇ [LATIN CAPITAL LETTER BROKEN L]
|
||||||
|
"\uA746" => "L"
|
||||||
|
|
||||||
|
# Ꝉ [LATIN CAPITAL LETTER L WITH HIGH STROKE]
|
||||||
|
"\uA748" => "L"
|
||||||
|
|
||||||
|
# Ꞁ [LATIN CAPITAL LETTER TURNED L]
|
||||||
|
"\uA780" => "L"
|
||||||
|
|
||||||
|
# L [FULLWIDTH LATIN CAPITAL LETTER L]
|
||||||
|
"\uFF2C" => "L"
|
||||||
|
|
||||||
|
# ĺ [LATIN SMALL LETTER L WITH ACUTE]
|
||||||
|
"\u013A" => "l"
|
||||||
|
|
||||||
|
# ļ [LATIN SMALL LETTER L WITH CEDILLA]
|
||||||
|
"\u013C" => "l"
|
||||||
|
|
||||||
|
# ľ [LATIN SMALL LETTER L WITH CARON]
|
||||||
|
"\u013E" => "l"
|
||||||
|
|
||||||
|
# ŀ [LATIN SMALL LETTER L WITH MIDDLE DOT]
|
||||||
|
"\u0140" => "l"
|
||||||
|
|
||||||
|
# ł [LATIN SMALL LETTER L WITH STROKE]
|
||||||
|
"\u0142" => "l"
|
||||||
|
|
||||||
|
# ƚ [LATIN SMALL LETTER L WITH BAR]
|
||||||
|
"\u019A" => "l"
|
||||||
|
|
||||||
|
# ȴ [LATIN SMALL LETTER L WITH CURL]
|
||||||
|
"\u0234" => "l"
|
||||||
|
|
||||||
|
# ɫ [LATIN SMALL LETTER L WITH MIDDLE TILDE]
|
||||||
|
"\u026B" => "l"
|
||||||
|
|
||||||
|
# ɬ [LATIN SMALL LETTER L WITH BELT]
|
||||||
|
"\u026C" => "l"
|
||||||
|
|
||||||
|
# ɭ [LATIN SMALL LETTER L WITH RETROFLEX HOOK]
|
||||||
|
"\u026D" => "l"
|
||||||
|
|
||||||
|
# ᶅ [LATIN SMALL LETTER L WITH PALATAL HOOK]
|
||||||
|
"\u1D85" => "l"
|
||||||
|
|
||||||
|
# ḷ [LATIN SMALL LETTER L WITH DOT BELOW]
|
||||||
|
"\u1E37" => "l"
|
||||||
|
|
||||||
|
# ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON]
|
||||||
|
"\u1E39" => "l"
|
||||||
|
|
||||||
|
# ḻ [LATIN SMALL LETTER L WITH LINE BELOW]
|
||||||
|
"\u1E3B" => "l"
|
||||||
|
|
||||||
|
# ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW]
|
||||||
|
"\u1E3D" => "l"
|
||||||
|
|
||||||
|
# ⓛ [CIRCLED LATIN SMALL LETTER L]
|
||||||
|
"\u24DB" => "l"
|
||||||
|
|
||||||
|
# ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR]
|
||||||
|
"\u2C61" => "l"
|
||||||
|
|
||||||
|
# ꝇ [LATIN SMALL LETTER BROKEN L]
|
||||||
|
"\uA747" => "l"
|
||||||
|
|
||||||
|
# ꝉ [LATIN SMALL LETTER L WITH HIGH STROKE]
|
||||||
|
"\uA749" => "l"
|
||||||
|
|
||||||
|
# ꞁ [LATIN SMALL LETTER TURNED L]
|
||||||
|
"\uA781" => "l"
|
||||||
|
|
||||||
|
# l [FULLWIDTH LATIN SMALL LETTER L]
|
||||||
|
"\uFF4C" => "l"
|
||||||
|
|
||||||
|
# LJ [LATIN CAPITAL LETTER LJ]
|
||||||
|
"\u01C7" => "LJ"
|
||||||
|
|
||||||
|
# Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL]
|
||||||
|
"\u1EFA" => "LL"
|
||||||
|
|
||||||
|
# Lj [LATIN CAPITAL LETTER L WITH SMALL LETTER J]
|
||||||
|
"\u01C8" => "Lj"
|
||||||
|
|
||||||
|
# ⒧ [PARENTHESIZED LATIN SMALL LETTER L]
|
||||||
|
"\u24A7" => "(l)"
|
||||||
|
|
||||||
|
# lj [LATIN SMALL LETTER LJ]
|
||||||
|
"\u01C9" => "lj"
|
||||||
|
|
||||||
|
# ỻ [LATIN SMALL LETTER MIDDLE-WELSH LL]
|
||||||
|
"\u1EFB" => "ll"
|
||||||
|
|
||||||
|
# ʪ [LATIN SMALL LETTER LS DIGRAPH]
|
||||||
|
"\u02AA" => "ls"
|
||||||
|
|
||||||
|
# ʫ [LATIN SMALL LETTER LZ DIGRAPH]
|
||||||
|
"\u02AB" => "lz"
|
||||||
|
|
||||||
|
# Ɯ [LATIN CAPITAL LETTER TURNED M]
|
||||||
|
"\u019C" => "M"
|
||||||
|
|
||||||
|
# ᴍ [LATIN LETTER SMALL CAPITAL M]
|
||||||
|
"\u1D0D" => "M"
|
||||||
|
|
||||||
|
# Ḿ [LATIN CAPITAL LETTER M WITH ACUTE]
|
||||||
|
"\u1E3E" => "M"
|
||||||
|
|
||||||
|
# Ṁ [LATIN CAPITAL LETTER M WITH DOT ABOVE]
|
||||||
|
"\u1E40" => "M"
|
||||||
|
|
||||||
|
# Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW]
|
||||||
|
"\u1E42" => "M"
|
||||||
|
|
||||||
|
# Ⓜ [CIRCLED LATIN CAPITAL LETTER M]
|
||||||
|
"\u24C2" => "M"
|
||||||
|
|
||||||
|
# Ɱ [LATIN CAPITAL LETTER M WITH HOOK]
|
||||||
|
"\u2C6E" => "M"
|
||||||
|
|
||||||
|
# ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M]
|
||||||
|
"\uA7FD" => "M"
|
||||||
|
|
||||||
|
# ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M]
|
||||||
|
"\uA7FF" => "M"
|
||||||
|
|
||||||
|
# M [FULLWIDTH LATIN CAPITAL LETTER M]
|
||||||
|
"\uFF2D" => "M"
|
||||||
|
|
||||||
|
# ɯ [LATIN SMALL LETTER TURNED M]
|
||||||
|
"\u026F" => "m"
|
||||||
|
|
||||||
|
# ɰ [LATIN SMALL LETTER TURNED M WITH LONG LEG]
|
||||||
|
"\u0270" => "m"
|
||||||
|
|
||||||
|
# ɱ [LATIN SMALL LETTER M WITH HOOK]
|
||||||
|
"\u0271" => "m"
|
||||||
|
|
||||||
|
# ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE]
|
||||||
|
"\u1D6F" => "m"
|
||||||
|
|
||||||
|
# ᶆ [LATIN SMALL LETTER M WITH PALATAL HOOK]
|
||||||
|
"\u1D86" => "m"
|
||||||
|
|
||||||
|
# ḿ [LATIN SMALL LETTER M WITH ACUTE]
|
||||||
|
"\u1E3F" => "m"
|
||||||
|
|
||||||
|
# ṁ [LATIN SMALL LETTER M WITH DOT ABOVE]
|
||||||
|
"\u1E41" => "m"
|
||||||
|
|
||||||
|
# ṃ [LATIN SMALL LETTER M WITH DOT BELOW]
|
||||||
|
"\u1E43" => "m"
|
||||||
|
|
||||||
|
# ⓜ [CIRCLED LATIN SMALL LETTER M]
|
||||||
|
"\u24DC" => "m"
|
||||||
|
|
||||||
|
# m [FULLWIDTH LATIN SMALL LETTER M]
|
||||||
|
"\uFF4D" => "m"
|
||||||
|
|
||||||
|
# ⒨ [PARENTHESIZED LATIN SMALL LETTER M]
|
||||||
|
"\u24A8" => "(m)"
|
||||||
|
|
||||||
|
# Ñ [LATIN CAPITAL LETTER N WITH TILDE]
|
||||||
|
"\u00D1" => "N"
|
||||||
|
|
||||||
|
# Ń [LATIN CAPITAL LETTER N WITH ACUTE]
|
||||||
|
"\u0143" => "N"
|
||||||
|
|
||||||
|
# Ņ [LATIN CAPITAL LETTER N WITH CEDILLA]
|
||||||
|
"\u0145" => "N"
|
||||||
|
|
||||||
|
# Ň [LATIN CAPITAL LETTER N WITH CARON]
|
||||||
|
"\u0147" => "N"
|
||||||
|
|
||||||
|
# Ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG]
|
||||||
|
"\u014A" => "N"
|
||||||
|
|
||||||
|
# Ɲ [LATIN CAPITAL LETTER N WITH LEFT HOOK]
|
||||||
|
"\u019D" => "N"
|
||||||
|
|
||||||
|
# Ǹ [LATIN CAPITAL LETTER N WITH GRAVE]
|
||||||
|
"\u01F8" => "N"
|
||||||
|
|
||||||
|
# Ƞ [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG]
|
||||||
|
"\u0220" => "N"
|
||||||
|
|
||||||
|
# ɴ [LATIN LETTER SMALL CAPITAL N]
|
||||||
|
"\u0274" => "N"
|
||||||
|
|
||||||
|
# ᴎ [LATIN LETTER SMALL CAPITAL REVERSED N]
|
||||||
|
"\u1D0E" => "N"
|
||||||
|
|
||||||
|
# Ṅ [LATIN CAPITAL LETTER N WITH DOT ABOVE]
|
||||||
|
"\u1E44" => "N"
|
||||||
|
|
||||||
|
# Ṇ [LATIN CAPITAL LETTER N WITH DOT BELOW]
|
||||||
|
"\u1E46" => "N"
|
||||||
|
|
||||||
|
# Ṉ [LATIN CAPITAL LETTER N WITH LINE BELOW]
|
||||||
|
"\u1E48" => "N"
|
||||||
|
|
||||||
|
# Ṋ [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW]
|
||||||
|
"\u1E4A" => "N"
|
||||||
|
|
||||||
|
# Ⓝ [CIRCLED LATIN CAPITAL LETTER N]
|
||||||
|
"\u24C3" => "N"
|
||||||
|
|
||||||
|
# N [FULLWIDTH LATIN CAPITAL LETTER N]
|
||||||
|
"\uFF2E" => "N"
|
||||||
|
|
||||||
|
# ñ [LATIN SMALL LETTER N WITH TILDE]
|
||||||
|
"\u00F1" => "n"
|
||||||
|
|
||||||
|
# ń [LATIN SMALL LETTER N WITH ACUTE]
|
||||||
|
"\u0144" => "n"
|
||||||
|
|
||||||
|
# ņ [LATIN SMALL LETTER N WITH CEDILLA]
|
||||||
|
"\u0146" => "n"
|
||||||
|
|
||||||
|
# ň [LATIN SMALL LETTER N WITH CARON]
|
||||||
|
"\u0148" => "n"
|
||||||
|
|
||||||
|
# ʼn [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE]
|
||||||
|
"\u0149" => "n"
|
||||||
|
|
||||||
|
# ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN SMALL LETTER ENG]
|
||||||
|
"\u014B" => "n"
|
||||||
|
|
||||||
|
# ƞ [LATIN SMALL LETTER N WITH LONG RIGHT LEG]
|
||||||
|
"\u019E" => "n"
|
||||||
|
|
||||||
|
# ǹ [LATIN SMALL LETTER N WITH GRAVE]
|
||||||
|
"\u01F9" => "n"
|
||||||
|
|
||||||
|
# ȵ [LATIN SMALL LETTER N WITH CURL]
|
||||||
|
"\u0235" => "n"
|
||||||
|
|
||||||
|
# ɲ [LATIN SMALL LETTER N WITH LEFT HOOK]
|
||||||
|
"\u0272" => "n"
|
||||||
|
|
||||||
|
# ɳ [LATIN SMALL LETTER N WITH RETROFLEX HOOK]
|
||||||
|
"\u0273" => "n"
|
||||||
|
|
||||||
|
# ᵰ [LATIN SMALL LETTER N WITH MIDDLE TILDE]
|
||||||
|
"\u1D70" => "n"
|
||||||
|
|
||||||
|
# ᶇ [LATIN SMALL LETTER N WITH PALATAL HOOK]
|
||||||
|
"\u1D87" => "n"
|
||||||
|
|
||||||
|
# ṅ [LATIN SMALL LETTER N WITH DOT ABOVE]
|
||||||
|
"\u1E45" => "n"
|
||||||
|
|
||||||
|
# ṇ [LATIN SMALL LETTER N WITH DOT BELOW]
|
||||||
|
"\u1E47" => "n"
|
||||||
|
|
||||||
|
# ṉ [LATIN SMALL LETTER N WITH LINE BELOW]
|
||||||
|
"\u1E49" => "n"
|
||||||
|
|
||||||
|
# ṋ [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW]
|
||||||
|
"\u1E4B" => "n"
|
||||||
|
|
||||||
|
# ⁿ [SUPERSCRIPT LATIN SMALL LETTER N]
|
||||||
|
"\u207F" => "n"
|
||||||
|
|
||||||
|
# ⓝ [CIRCLED LATIN SMALL LETTER N]
|
||||||
|
"\u24DD" => "n"
|
||||||
|
|
||||||
|
# n [FULLWIDTH LATIN SMALL LETTER N]
|
||||||
|
"\uFF4E" => "n"
|
||||||
|
|
||||||
|
# NJ [LATIN CAPITAL LETTER NJ]
|
||||||
|
"\u01CA" => "NJ"
|
||||||
|
|
||||||
|
# Nj [LATIN CAPITAL LETTER N WITH SMALL LETTER J]
|
||||||
|
"\u01CB" => "Nj"
|
||||||
|
|
||||||
|
# ⒩ [PARENTHESIZED LATIN SMALL LETTER N]
|
||||||
|
"\u24A9" => "(n)"
|
||||||
|
|
||||||
|
# nj [LATIN SMALL LETTER NJ]
|
||||||
|
"\u01CC" => "nj"
|
||||||
|
|
||||||
|
# Ò [LATIN CAPITAL LETTER O WITH GRAVE]
|
||||||
|
"\u00D2" => "O"
|
||||||
|
|
||||||
|
# Ó [LATIN CAPITAL LETTER O WITH ACUTE]
|
||||||
|
"\u00D3" => "O"
|
||||||
|
|
||||||
|
# Ô [LATIN CAPITAL LETTER O WITH CIRCUMFLEX]
|
||||||
|
"\u00D4" => "O"
|
||||||
|
|
||||||
|
# Õ [LATIN CAPITAL LETTER O WITH TILDE]
|
||||||
|
"\u00D5" => "O"
|
||||||
|
|
||||||
|
# Ö [LATIN CAPITAL LETTER O WITH DIAERESIS]
|
||||||
|
"\u00D6" => "O"
|
||||||
|
|
||||||
|
# Ø [LATIN CAPITAL LETTER O WITH STROKE]
|
||||||
|
"\u00D8" => "O"
|
||||||
|
|
||||||
|
# Ō [LATIN CAPITAL LETTER O WITH MACRON]
|
||||||
|
"\u014C" => "O"
|
||||||
|
|
||||||
|
# Ŏ [LATIN CAPITAL LETTER O WITH BREVE]
|
||||||
|
"\u014E" => "O"
|
||||||
|
|
||||||
|
# Ő [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE]
|
||||||
|
"\u0150" => "O"
|
||||||
|
|
||||||
|
# Ɔ [LATIN CAPITAL LETTER OPEN O]
|
||||||
|
"\u0186" => "O"
|
||||||
|
|
||||||
|
# Ɵ [LATIN CAPITAL LETTER O WITH MIDDLE TILDE]
|
||||||
|
"\u019F" => "O"
|
||||||
|
|
||||||
|
# Ơ [LATIN CAPITAL LETTER O WITH HORN]
|
||||||
|
"\u01A0" => "O"
|
||||||
|
|
||||||
|
# Ǒ [LATIN CAPITAL LETTER O WITH CARON]
|
||||||
|
"\u01D1" => "O"
|
||||||
|
|
||||||
|
# Ǫ [LATIN CAPITAL LETTER O WITH OGONEK]
|
||||||
|
"\u01EA" => "O"
|
||||||
|
|
||||||
|
# Ǭ [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON]
|
||||||
|
"\u01EC" => "O"
|
||||||
|
|
||||||
|
# Ǿ [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE]
|
||||||
|
"\u01FE" => "O"
|
||||||
|
|
||||||
|
# Ȍ [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE]
|
||||||
|
"\u020C" => "O"
|
||||||
|
|
||||||
|
# Ȏ [LATIN CAPITAL LETTER O WITH INVERTED BREVE]
|
||||||
|
"\u020E" => "O"
|
||||||
|
|
||||||
|
# Ȫ [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON]
|
||||||
|
"\u022A" => "O"
|
||||||
|
|
||||||
|
# Ȭ [LATIN CAPITAL LETTER O WITH TILDE AND MACRON]
|
||||||
|
"\u022C" => "O"
|
||||||
|
|
||||||
|
# Ȯ [LATIN CAPITAL LETTER O WITH DOT ABOVE]
|
||||||
|
"\u022E" => "O"
|
||||||
|
|
||||||
|
# Ȱ [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON]
|
||||||
|
"\u0230" => "O"
|
||||||
|
|
||||||
|
# ᴏ [LATIN LETTER SMALL CAPITAL O]
|
||||||
|
"\u1D0F" => "O"
|
||||||
|
|
||||||
|
# ᴐ [LATIN LETTER SMALL CAPITAL OPEN O]
|
||||||
|
"\u1D10" => "O"
|
||||||
|
|
||||||
|
# Ṍ [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE]
|
||||||
|
"\u1E4C" => "O"
|
||||||
|
|
||||||
|
# Ṏ [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS]
|
||||||
|
"\u1E4E" => "O"
|
||||||
|
|
||||||
|
# Ṑ [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE]
|
||||||
|
"\u1E50" => "O"
|
||||||
|
|
||||||
|
# Ṓ [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE]
|
||||||
|
"\u1E52" => "O"
|
||||||
|
|
||||||
|
# Ọ [LATIN CAPITAL LETTER O WITH DOT BELOW]
|
||||||
|
"\u1ECC" => "O"
|
||||||
|
|
||||||
|
# Ỏ [LATIN CAPITAL LETTER O WITH HOOK ABOVE]
|
||||||
|
"\u1ECE" => "O"
|
||||||
|
|
||||||
|
# Ố [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE]
|
||||||
|
"\u1ED0" => "O"
|
||||||
|
|
||||||
|
# Ồ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE]
|
||||||
|
"\u1ED2" => "O"
|
||||||
|
|
||||||
|
# Ổ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
|
||||||
|
"\u1ED4" => "O"
|
||||||
|
|
||||||
|
# Ỗ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE]
|
||||||
|
"\u1ED6" => "O"
|
||||||
|
|
||||||
|
# Ộ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
|
||||||
|
"\u1ED8" => "O"
|
||||||
|
|
||||||
|
# Ớ [LATIN CAPITAL LETTER O WITH HORN AND ACUTE]
|
||||||
|
"\u1EDA" => "O"
|
||||||
|
|
||||||
|
# Ờ [LATIN CAPITAL LETTER O WITH HORN AND GRAVE]
|
||||||
|
"\u1EDC" => "O"
|
||||||
|
|
||||||
|
# Ở [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE]
|
||||||
|
"\u1EDE" => "O"
|
||||||
|
|
||||||
|
# Ỡ [LATIN CAPITAL LETTER O WITH HORN AND TILDE]
|
||||||
|
"\u1EE0" => "O"
|
||||||
|
|
||||||
|
# Ợ [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW]
|
||||||
|
"\u1EE2" => "O"
|
||||||
|
|
||||||
|
# Ⓞ [CIRCLED LATIN CAPITAL LETTER O]
|
||||||
|
"\u24C4" => "O"
|
||||||
|
|
||||||
|
# Ꝋ [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY]
|
||||||
|
"\uA74A" => "O"
|
||||||
|
|
||||||
|
# Ꝍ [LATIN CAPITAL LETTER O WITH LOOP]
|
||||||
|
"\uA74C" => "O"
|
||||||
|
|
||||||
|
# O [FULLWIDTH LATIN CAPITAL LETTER O]
|
||||||
|
"\uFF2F" => "O"
|
||||||
|
|
||||||
|
# ò [LATIN SMALL LETTER O WITH GRAVE]
|
||||||
|
"\u00F2" => "o"
|
||||||
|
|
||||||
|
# ó [LATIN SMALL LETTER O WITH ACUTE]
|
||||||
|
"\u00F3" => "o"
|
||||||
|
|
||||||
|
# ô [LATIN SMALL LETTER O WITH CIRCUMFLEX]
|
||||||
|
"\u00F4" => "o"
|
||||||
|
|
||||||
|
# õ [LATIN SMALL LETTER O WITH TILDE]
|
||||||
|
"\u00F5" => "o"
|
||||||
|
|
||||||
|
# ö [LATIN SMALL LETTER O WITH DIAERESIS]
|
||||||
|
"\u00F6" => "o"
|
||||||
|
|
||||||
|
# ø [LATIN SMALL LETTER O WITH STROKE]
|
||||||
|
"\u00F8" => "o"
|
||||||
|
|
||||||
|
# ō [LATIN SMALL LETTER O WITH MACRON]
|
||||||
|
"\u014D" => "o"
|
||||||
|
|
||||||
|
# ŏ [LATIN SMALL LETTER O WITH BREVE]
|
||||||
|
"\u014F" => "o"
|
||||||
|
|
||||||
|
# ő [LATIN SMALL LETTER O WITH DOUBLE ACUTE]
|
||||||
|
"\u0151" => "o"
|
||||||
|
|
||||||
|
# ơ [LATIN SMALL LETTER O WITH HORN]
|
||||||
|
"\u01A1" => "o"
|
||||||
|
|
||||||
|
# ǒ [LATIN SMALL LETTER O WITH CARON]
|
||||||
|
"\u01D2" => "o"
|
||||||
|
|
||||||
|
# ǫ [LATIN SMALL LETTER O WITH OGONEK]
|
||||||
|
"\u01EB" => "o"
|
||||||
|
|
||||||
|
# ǭ [LATIN SMALL LETTER O WITH OGONEK AND MACRON]
|
||||||
|
"\u01ED" => "o"
|
||||||
|
|
||||||
|
# ǿ [LATIN SMALL LETTER O WITH STROKE AND ACUTE]
|
||||||
|
"\u01FF" => "o"
|
||||||
|
|
||||||
|
# ȍ [LATIN SMALL LETTER O WITH DOUBLE GRAVE]
|
||||||
|
"\u020D" => "o"
|
||||||
|
|
||||||
|
# ȏ [LATIN SMALL LETTER O WITH INVERTED BREVE]
|
||||||
|
"\u020F" => "o"
|
||||||
|
|
||||||
|
# ȫ [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON]
|
||||||
|
"\u022B" => "o"
|
||||||
|
|
||||||
|
# ȭ [LATIN SMALL LETTER O WITH TILDE AND MACRON]
|
||||||
|
"\u022D" => "o"
|
||||||
|
|
||||||
|
# ȯ [LATIN SMALL LETTER O WITH DOT ABOVE]
|
||||||
|
"\u022F" => "o"
|
||||||
|
|
||||||
|
# ȱ [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON]
|
||||||
|
"\u0231" => "o"
|
||||||
|
|
||||||
|
# ɔ [LATIN SMALL LETTER OPEN O]
|
||||||
|
"\u0254" => "o"
|
||||||
|
|
||||||
|
# ɵ [LATIN SMALL LETTER BARRED O]
|
||||||
|
"\u0275" => "o"
|
||||||
|
|
||||||
|
# ᴖ [LATIN SMALL LETTER TOP HALF O]
|
||||||
|
"\u1D16" => "o"
|
||||||
|
|
||||||
|
# ᴗ [LATIN SMALL LETTER BOTTOM HALF O]
|
||||||
|
"\u1D17" => "o"
|
||||||
|
|
||||||
|
# ᶗ [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK]
|
||||||
|
"\u1D97" => "o"
|
||||||
|
|
||||||
|
# ṍ [LATIN SMALL LETTER O WITH TILDE AND ACUTE]
|
||||||
|
"\u1E4D" => "o"
|
||||||
|
|
||||||
|
# ṏ [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS]
|
||||||
|
"\u1E4F" => "o"
|
||||||
|
|
||||||
|
# ṑ [LATIN SMALL LETTER O WITH MACRON AND GRAVE]
|
||||||
|
"\u1E51" => "o"
|
||||||
|
|
||||||
|
# ṓ [LATIN SMALL LETTER O WITH MACRON AND ACUTE]
|
||||||
|
"\u1E53" => "o"
|
||||||
|
|
||||||
|
# ọ [LATIN SMALL LETTER O WITH DOT BELOW]
|
||||||
|
"\u1ECD" => "o"
|
||||||
|
|
||||||
|
# ỏ [LATIN SMALL LETTER O WITH HOOK ABOVE]
|
||||||
|
"\u1ECF" => "o"
|
||||||
|
|
||||||
|
# ố [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE]
|
||||||
|
"\u1ED1" => "o"
|
||||||
|
|
||||||
|
# ồ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE]
|
||||||
|
"\u1ED3" => "o"
|
||||||
|
|
||||||
|
# ổ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
|
||||||
|
"\u1ED5" => "o"
|
||||||
|
|
||||||
|
# ỗ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE]
|
||||||
|
"\u1ED7" => "o"
|
||||||
|
|
||||||
|
# ộ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
|
||||||
|
"\u1ED9" => "o"
|
||||||
|
|
||||||
|
# ớ [LATIN SMALL LETTER O WITH HORN AND ACUTE]
|
||||||
|
"\u1EDB" => "o"
|
||||||
|
|
||||||
|
# ờ [LATIN SMALL LETTER O WITH HORN AND GRAVE]
|
||||||
|
"\u1EDD" => "o"
|
||||||
|
|
||||||
|
# ở [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE]
|
||||||
|
"\u1EDF" => "o"
|
||||||
|
|
||||||
|
# ỡ [LATIN SMALL LETTER O WITH HORN AND TILDE]
|
||||||
|
"\u1EE1" => "o"
|
||||||
|
|
||||||
|
# ợ [LATIN SMALL LETTER O WITH HORN AND DOT BELOW]
|
||||||
|
"\u1EE3" => "o"
|
||||||
|
|
||||||
|
# ₒ [LATIN SUBSCRIPT SMALL LETTER O]
|
||||||
|
"\u2092" => "o"
|
||||||
|
|
||||||
|
# ⓞ [CIRCLED LATIN SMALL LETTER O]
|
||||||
|
"\u24DE" => "o"
|
||||||
|
|
||||||
|
# ⱺ [LATIN SMALL LETTER O WITH LOW RING INSIDE]
|
||||||
|
"\u2C7A" => "o"
|
||||||
|
|
||||||
|
# ꝋ [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY]
|
||||||
|
"\uA74B" => "o"
|
||||||
|
|
||||||
|
# ꝍ [LATIN SMALL LETTER O WITH LOOP]
|
||||||
|
"\uA74D" => "o"
|
||||||
|
|
||||||
|
# o [FULLWIDTH LATIN SMALL LETTER O]
|
||||||
|
"\uFF4F" => "o"
|
||||||
|
|
||||||
|
# Œ [LATIN CAPITAL LIGATURE OE]
|
||||||
|
"\u0152" => "OE"
|
||||||
|
|
||||||
|
# ɶ [LATIN LETTER SMALL CAPITAL OE]
|
||||||
|
"\u0276" => "OE"
|
||||||
|
|
||||||
|
# Ꝏ [LATIN CAPITAL LETTER OO]
|
||||||
|
"\uA74E" => "OO"
|
||||||
|
|
||||||
|
# Ȣ http://en.wikipedia.org/wiki/OU [LATIN CAPITAL LETTER OU]
|
||||||
|
"\u0222" => "OU"
|
||||||
|
|
||||||
|
# ᴕ [LATIN LETTER SMALL CAPITAL OU]
|
||||||
|
"\u1D15" => "OU"
|
||||||
|
|
||||||
|
# ⒪ [PARENTHESIZED LATIN SMALL LETTER O]
|
||||||
|
"\u24AA" => "(o)"
|
||||||
|
|
||||||
|
# œ [LATIN SMALL LIGATURE OE]
|
||||||
|
"\u0153" => "oe"
|
||||||
|
|
||||||
|
# ᴔ [LATIN SMALL LETTER TURNED OE]
|
||||||
|
"\u1D14" => "oe"
|
||||||
|
|
||||||
|
# ꝏ [LATIN SMALL LETTER OO]
|
||||||
|
"\uA74F" => "oo"
|
||||||
|
|
||||||
|
# ȣ http://en.wikipedia.org/wiki/OU [LATIN SMALL LETTER OU]
|
||||||
|
"\u0223" => "ou"
|
||||||
|
|
||||||
|
# Ƥ [LATIN CAPITAL LETTER P WITH HOOK]
|
||||||
|
"\u01A4" => "P"
|
||||||
|
|
||||||
|
# ᴘ [LATIN LETTER SMALL CAPITAL P]
|
||||||
|
"\u1D18" => "P"
|
||||||
|
|
||||||
|
# Ṕ [LATIN CAPITAL LETTER P WITH ACUTE]
|
||||||
|
"\u1E54" => "P"
|
||||||
|
|
||||||
|
# Ṗ [LATIN CAPITAL LETTER P WITH DOT ABOVE]
|
||||||
|
"\u1E56" => "P"
|
||||||
|
|
||||||
|
# Ⓟ [CIRCLED LATIN CAPITAL LETTER P]
|
||||||
|
"\u24C5" => "P"
|
||||||
|
|
||||||
|
# Ᵽ [LATIN CAPITAL LETTER P WITH STROKE]
|
||||||
|
"\u2C63" => "P"
|
||||||
|
|
||||||
|
# Ꝑ [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER]
|
||||||
|
"\uA750" => "P"
|
||||||
|
|
||||||
|
# Ꝓ [LATIN CAPITAL LETTER P WITH FLOURISH]
|
||||||
|
"\uA752" => "P"
|
||||||
|
|
||||||
|
# Ꝕ [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL]
|
||||||
|
"\uA754" => "P"
|
||||||
|
|
||||||
|
# P [FULLWIDTH LATIN CAPITAL LETTER P]
|
||||||
|
"\uFF30" => "P"
|
||||||
|
|
||||||
|
# ƥ [LATIN SMALL LETTER P WITH HOOK]
|
||||||
|
"\u01A5" => "p"
|
||||||
|
|
||||||
|
# ᵱ [LATIN SMALL LETTER P WITH MIDDLE TILDE]
|
||||||
|
"\u1D71" => "p"
|
||||||
|
|
||||||
|
# ᵽ [LATIN SMALL LETTER P WITH STROKE]
|
||||||
|
"\u1D7D" => "p"
|
||||||
|
|
||||||
|
# ᶈ [LATIN SMALL LETTER P WITH PALATAL HOOK]
|
||||||
|
"\u1D88" => "p"
|
||||||
|
|
||||||
|
# ṕ [LATIN SMALL LETTER P WITH ACUTE]
|
||||||
|
"\u1E55" => "p"
|
||||||
|
|
||||||
|
# ṗ [LATIN SMALL LETTER P WITH DOT ABOVE]
|
||||||
|
"\u1E57" => "p"
|
||||||
|
|
||||||
|
# ⓟ [CIRCLED LATIN SMALL LETTER P]
|
||||||
|
"\u24DF" => "p"
|
||||||
|
|
||||||
|
# ꝑ [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER]
|
||||||
|
"\uA751" => "p"
|
||||||
|
|
||||||
|
# ꝓ [LATIN SMALL LETTER P WITH FLOURISH]
|
||||||
|
"\uA753" => "p"
|
||||||
|
|
||||||
|
# ꝕ [LATIN SMALL LETTER P WITH SQUIRREL TAIL]
|
||||||
|
"\uA755" => "p"
|
||||||
|
|
||||||
|
# ꟼ [LATIN EPIGRAPHIC LETTER REVERSED P]
|
||||||
|
"\uA7FC" => "p"
|
||||||
|
|
||||||
|
# p [FULLWIDTH LATIN SMALL LETTER P]
|
||||||
|
"\uFF50" => "p"
|
||||||
|
|
||||||
|
# ⒫ [PARENTHESIZED LATIN SMALL LETTER P]
|
||||||
|
"\u24AB" => "(p)"
|
||||||
|
|
||||||
|
# Ɋ [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL]
|
||||||
|
"\u024A" => "Q"
|
||||||
|
|
||||||
|
# Ⓠ [CIRCLED LATIN CAPITAL LETTER Q]
|
||||||
|
"\u24C6" => "Q"
|
||||||
|
|
||||||
|
# Ꝗ [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER]
|
||||||
|
"\uA756" => "Q"
|
||||||
|
|
||||||
|
# Ꝙ [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE]
|
||||||
|
"\uA758" => "Q"
|
||||||
|
|
||||||
|
# Q [FULLWIDTH LATIN CAPITAL LETTER Q]
|
||||||
|
"\uFF31" => "Q"
|
||||||
|
|
||||||
|
# ĸ http://en.wikipedia.org/wiki/Kra_(letter) [LATIN SMALL LETTER KRA]
|
||||||
|
"\u0138" => "q"
|
||||||
|
|
||||||
|
# ɋ [LATIN SMALL LETTER Q WITH HOOK TAIL]
|
||||||
|
"\u024B" => "q"
|
||||||
|
|
||||||
|
# ʠ [LATIN SMALL LETTER Q WITH HOOK]
|
||||||
|
"\u02A0" => "q"
|
||||||
|
|
||||||
|
# ⓠ [CIRCLED LATIN SMALL LETTER Q]
|
||||||
|
"\u24E0" => "q"
|
||||||
|
|
||||||
|
# ꝗ [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER]
|
||||||
|
"\uA757" => "q"
|
||||||
|
|
||||||
|
# ꝙ [LATIN SMALL LETTER Q WITH DIAGONAL STROKE]
|
||||||
|
"\uA759" => "q"
|
||||||
|
|
||||||
|
# q [FULLWIDTH LATIN SMALL LETTER Q]
|
||||||
|
"\uFF51" => "q"
|
||||||
|
|
||||||
|
# ⒬ [PARENTHESIZED LATIN SMALL LETTER Q]
|
||||||
|
"\u24AC" => "(q)"
|
||||||
|
|
||||||
|
# ȹ [LATIN SMALL LETTER QP DIGRAPH]
|
||||||
|
"\u0239" => "qp"
|
||||||
|
|
||||||
|
# Ŕ [LATIN CAPITAL LETTER R WITH ACUTE]
|
||||||
|
"\u0154" => "R"
|
||||||
|
|
||||||
|
# Ŗ [LATIN CAPITAL LETTER R WITH CEDILLA]
|
||||||
|
"\u0156" => "R"
|
||||||
|
|
||||||
|
# Ř [LATIN CAPITAL LETTER R WITH CARON]
|
||||||
|
"\u0158" => "R"
|
||||||
|
|
||||||
|
# Ȓ [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE]
|
||||||
|
"\u0210" => "R"
|
||||||
|
|
||||||
|
# Ȓ [LATIN CAPITAL LETTER R WITH INVERTED BREVE]
|
||||||
|
"\u0212" => "R"
|
||||||
|
|
||||||
|
# Ɍ [LATIN CAPITAL LETTER R WITH STROKE]
|
||||||
|
"\u024C" => "R"
|
||||||
|
|
||||||
|
# ʀ [LATIN LETTER SMALL CAPITAL R]
|
||||||
|
"\u0280" => "R"
|
||||||
|
|
||||||
|
# ʁ [LATIN LETTER SMALL CAPITAL INVERTED R]
|
||||||
|
"\u0281" => "R"
|
||||||
|
|
||||||
|
# ᴙ [LATIN LETTER SMALL CAPITAL REVERSED R]
|
||||||
|
"\u1D19" => "R"
|
||||||
|
|
||||||
|
# ᴚ [LATIN LETTER SMALL CAPITAL TURNED R]
|
||||||
|
"\u1D1A" => "R"
|
||||||
|
|
||||||
|
# Ṙ [LATIN CAPITAL LETTER R WITH DOT ABOVE]
|
||||||
|
"\u1E58" => "R"
|
||||||
|
|
||||||
|
# Ṛ [LATIN CAPITAL LETTER R WITH DOT BELOW]
|
||||||
|
"\u1E5A" => "R"
|
||||||
|
|
||||||
|
# Ṝ [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON]
|
||||||
|
"\u1E5C" => "R"
|
||||||
|
|
||||||
|
# Ṟ [LATIN CAPITAL LETTER R WITH LINE BELOW]
|
||||||
|
"\u1E5E" => "R"
|
||||||
|
|
||||||
|
# Ⓡ [CIRCLED LATIN CAPITAL LETTER R]
|
||||||
|
"\u24C7" => "R"
|
||||||
|
|
||||||
|
# Ɽ [LATIN CAPITAL LETTER R WITH TAIL]
|
||||||
|
"\u2C64" => "R"
|
||||||
|
|
||||||
|
# Ꝛ [LATIN CAPITAL LETTER R ROTUNDA]
|
||||||
|
"\uA75A" => "R"
|
||||||
|
|
||||||
|
# Ꞃ [LATIN CAPITAL LETTER INSULAR R]
|
||||||
|
"\uA782" => "R"
|
||||||
|
|
||||||
|
# R [FULLWIDTH LATIN CAPITAL LETTER R]
|
||||||
|
"\uFF32" => "R"
|
||||||
|
|
||||||
|
# ŕ [LATIN SMALL LETTER R WITH ACUTE]
|
||||||
|
"\u0155" => "r"
|
||||||
|
|
||||||
|
# ŗ [LATIN SMALL LETTER R WITH CEDILLA]
|
||||||
|
"\u0157" => "r"
|
||||||
|
|
||||||
|
# ř [LATIN SMALL LETTER R WITH CARON]
|
||||||
|
"\u0159" => "r"
|
||||||
|
|
||||||
|
# ȑ [LATIN SMALL LETTER R WITH DOUBLE GRAVE]
|
||||||
|
"\u0211" => "r"
|
||||||
|
|
||||||
|
# ȓ [LATIN SMALL LETTER R WITH INVERTED BREVE]
|
||||||
|
"\u0213" => "r"
|
||||||
|
|
||||||
|
# ɍ [LATIN SMALL LETTER R WITH STROKE]
|
||||||
|
"\u024D" => "r"
|
||||||
|
|
||||||
|
# ɼ [LATIN SMALL LETTER R WITH LONG LEG]
|
||||||
|
"\u027C" => "r"
|
||||||
|
|
||||||
|
# ɽ [LATIN SMALL LETTER R WITH TAIL]
|
||||||
|
"\u027D" => "r"
|
||||||
|
|
||||||
|
# ɾ [LATIN SMALL LETTER R WITH FISHHOOK]
|
||||||
|
"\u027E" => "r"
|
||||||
|
|
||||||
|
# ɿ [LATIN SMALL LETTER REVERSED R WITH FISHHOOK]
|
||||||
|
"\u027F" => "r"
|
||||||
|
|
||||||
|
# ᵣ [LATIN SUBSCRIPT SMALL LETTER R]
|
||||||
|
"\u1D63" => "r"
|
||||||
|
|
||||||
|
# ᵲ [LATIN SMALL LETTER R WITH MIDDLE TILDE]
|
||||||
|
"\u1D72" => "r"
|
||||||
|
|
||||||
|
# ᵳ [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE]
|
||||||
|
"\u1D73" => "r"
|
||||||
|
|
||||||
|
# ᶉ [LATIN SMALL LETTER R WITH PALATAL HOOK]
|
||||||
|
"\u1D89" => "r"
|
||||||
|
|
||||||
|
# ṙ [LATIN SMALL LETTER R WITH DOT ABOVE]
|
||||||
|
"\u1E59" => "r"
|
||||||
|
|
||||||
|
# ṛ [LATIN SMALL LETTER R WITH DOT BELOW]
|
||||||
|
"\u1E5B" => "r"
|
||||||
|
|
||||||
|
# ṝ [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON]
|
||||||
|
"\u1E5D" => "r"
|
||||||
|
|
||||||
|
# ṟ [LATIN SMALL LETTER R WITH LINE BELOW]
|
||||||
|
"\u1E5F" => "r"
|
||||||
|
|
||||||
|
# ⓡ [CIRCLED LATIN SMALL LETTER R]
|
||||||
|
"\u24E1" => "r"
|
||||||
|
|
||||||
|
# ꝛ [LATIN SMALL LETTER R ROTUNDA]
|
||||||
|
"\uA75B" => "r"
|
||||||
|
|
||||||
|
# ꞃ [LATIN SMALL LETTER INSULAR R]
|
||||||
|
"\uA783" => "r"
|
||||||
|
|
||||||
|
# r [FULLWIDTH LATIN SMALL LETTER R]
|
||||||
|
"\uFF52" => "r"
|
||||||
|
|
||||||
|
# ⒭ [PARENTHESIZED LATIN SMALL LETTER R]
|
||||||
|
"\u24AD" => "(r)"
|
||||||
|
|
||||||
|
# Ś [LATIN CAPITAL LETTER S WITH ACUTE]
|
||||||
|
"\u015A" => "S"
|
||||||
|
|
||||||
|
# Ŝ [LATIN CAPITAL LETTER S WITH CIRCUMFLEX]
|
||||||
|
"\u015C" => "S"
|
||||||
|
|
||||||
|
# Ş [LATIN CAPITAL LETTER S WITH CEDILLA]
|
||||||
|
"\u015E" => "S"
|
||||||
|
|
||||||
|
# Š [LATIN CAPITAL LETTER S WITH CARON]
|
||||||
|
"\u0160" => "S"
|
||||||
|
|
||||||
|
# Ș [LATIN CAPITAL LETTER S WITH COMMA BELOW]
|
||||||
|
"\u0218" => "S"
|
||||||
|
|
||||||
|
# Ṡ [LATIN CAPITAL LETTER S WITH DOT ABOVE]
|
||||||
|
"\u1E60" => "S"
|
||||||
|
|
||||||
|
# Ṣ [LATIN CAPITAL LETTER S WITH DOT BELOW]
|
||||||
|
"\u1E62" => "S"
|
||||||
|
|
||||||
|
# Ṥ [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE]
|
||||||
|
"\u1E64" => "S"
|
||||||
|
|
||||||
|
# Ṧ [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE]
|
||||||
|
"\u1E66" => "S"
|
||||||
|
|
||||||
|
# Ṩ [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE]
|
||||||
|
"\u1E68" => "S"
|
||||||
|
|
||||||
|
# Ⓢ [CIRCLED LATIN CAPITAL LETTER S]
|
||||||
|
"\u24C8" => "S"
|
||||||
|
|
||||||
|
# ꜱ [LATIN LETTER SMALL CAPITAL S]
|
||||||
|
"\uA731" => "S"
|
||||||
|
|
||||||
|
# ꞅ [LATIN SMALL LETTER INSULAR S]
|
||||||
|
"\uA785" => "S"
|
||||||
|
|
||||||
|
# S [FULLWIDTH LATIN CAPITAL LETTER S]
|
||||||
|
"\uFF33" => "S"
|
||||||
|
|
||||||
|
# ś [LATIN SMALL LETTER S WITH ACUTE]
|
||||||
|
"\u015B" => "s"
|
||||||
|
|
||||||
|
# ŝ [LATIN SMALL LETTER S WITH CIRCUMFLEX]
|
||||||
|
"\u015D" => "s"
|
||||||
|
|
||||||
|
# ş [LATIN SMALL LETTER S WITH CEDILLA]
|
||||||
|
"\u015F" => "s"
|
||||||
|
|
||||||
|
# š [LATIN SMALL LETTER S WITH CARON]
|
||||||
|
"\u0161" => "s"
|
||||||
|
|
||||||
|
# ſ http://en.wikipedia.org/wiki/Long_S [LATIN SMALL LETTER LONG S]
|
||||||
|
"\u017F" => "s"
|
||||||
|
|
||||||
|
# ș [LATIN SMALL LETTER S WITH COMMA BELOW]
|
||||||
|
"\u0219" => "s"
|
||||||
|
|
||||||
|
# ȿ [LATIN SMALL LETTER S WITH SWASH TAIL]
|
||||||
|
"\u023F" => "s"
|
||||||
|
|
||||||
|
# ʂ [LATIN SMALL LETTER S WITH HOOK]
|
||||||
|
"\u0282" => "s"
|
||||||
|
|
||||||
|
# ᵴ [LATIN SMALL LETTER S WITH MIDDLE TILDE]
|
||||||
|
"\u1D74" => "s"
|
||||||
|
|
||||||
|
# ᶊ [LATIN SMALL LETTER S WITH PALATAL HOOK]
|
||||||
|
"\u1D8A" => "s"
|
||||||
|
|
||||||
|
# ṡ [LATIN SMALL LETTER S WITH DOT ABOVE]
|
||||||
|
"\u1E61" => "s"
|
||||||
|
|
||||||
|
# ṣ [LATIN SMALL LETTER S WITH DOT BELOW]
|
||||||
|
"\u1E63" => "s"
|
||||||
|
|
||||||
|
# ṥ [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE]
|
||||||
|
"\u1E65" => "s"
|
||||||
|
|
||||||
|
# ṧ [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE]
|
||||||
|
"\u1E67" => "s"
|
||||||
|
|
||||||
|
# ṩ [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE]
|
||||||
|
"\u1E69" => "s"
|
||||||
|
|
||||||
|
# ẜ [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE]
|
||||||
|
"\u1E9C" => "s"
|
||||||
|
|
||||||
|
# ẝ [LATIN SMALL LETTER LONG S WITH HIGH STROKE]
|
||||||
|
"\u1E9D" => "s"
|
||||||
|
|
||||||
|
# ⓢ [CIRCLED LATIN SMALL LETTER S]
|
||||||
|
"\u24E2" => "s"
|
||||||
|
|
||||||
|
# Ꞅ [LATIN CAPITAL LETTER INSULAR S]
|
||||||
|
"\uA784" => "s"
|
||||||
|
|
||||||
|
# s [FULLWIDTH LATIN SMALL LETTER S]
|
||||||
|
"\uFF53" => "s"
|
||||||
|
|
||||||
|
# ẞ [LATIN CAPITAL LETTER SHARP S]
|
||||||
|
"\u1E9E" => "SS"
|
||||||
|
|
||||||
|
# ⒮ [PARENTHESIZED LATIN SMALL LETTER S]
|
||||||
|
"\u24AE" => "(s)"
|
||||||
|
|
||||||
|
# ß [LATIN SMALL LETTER SHARP S]
|
||||||
|
"\u00DF" => "ss"
|
||||||
|
|
||||||
|
# st [LATIN SMALL LIGATURE ST]
|
||||||
|
"\uFB06" => "st"
|
||||||
|
|
||||||
|
# Ţ [LATIN CAPITAL LETTER T WITH CEDILLA]
|
||||||
|
"\u0162" => "T"
|
||||||
|
|
||||||
|
# Ť [LATIN CAPITAL LETTER T WITH CARON]
|
||||||
|
"\u0164" => "T"
|
||||||
|
|
||||||
|
# Ŧ [LATIN CAPITAL LETTER T WITH STROKE]
|
||||||
|
"\u0166" => "T"
|
||||||
|
|
||||||
|
# Ƭ [LATIN CAPITAL LETTER T WITH HOOK]
|
||||||
|
"\u01AC" => "T"
|
||||||
|
|
||||||
|
# Ʈ [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK]
|
||||||
|
"\u01AE" => "T"
|
||||||
|
|
||||||
|
# Ț [LATIN CAPITAL LETTER T WITH COMMA BELOW]
|
||||||
|
"\u021A" => "T"
|
||||||
|
|
||||||
|
# Ⱦ [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE]
|
||||||
|
"\u023E" => "T"
|
||||||
|
|
||||||
|
# ᴛ [LATIN LETTER SMALL CAPITAL T]
|
||||||
|
"\u1D1B" => "T"
|
||||||
|
|
||||||
|
# Ṫ [LATIN CAPITAL LETTER T WITH DOT ABOVE]
|
||||||
|
"\u1E6A" => "T"
|
||||||
|
|
||||||
|
# Ṭ [LATIN CAPITAL LETTER T WITH DOT BELOW]
|
||||||
|
"\u1E6C" => "T"
|
||||||
|
|
||||||
|
# Ṯ [LATIN CAPITAL LETTER T WITH LINE BELOW]
|
||||||
|
"\u1E6E" => "T"
|
||||||
|
|
||||||
|
# Ṱ [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW]
|
||||||
|
"\u1E70" => "T"
|
||||||
|
|
||||||
|
# Ⓣ [CIRCLED LATIN CAPITAL LETTER T]
|
||||||
|
"\u24C9" => "T"
|
||||||
|
|
||||||
|
# Ꞇ [LATIN CAPITAL LETTER INSULAR T]
|
||||||
|
"\uA786" => "T"
|
||||||
|
|
||||||
|
# T [FULLWIDTH LATIN CAPITAL LETTER T]
|
||||||
|
"\uFF34" => "T"
|
||||||
|
|
||||||
|
# ţ [LATIN SMALL LETTER T WITH CEDILLA]
|
||||||
|
"\u0163" => "t"
|
||||||
|
|
||||||
|
# ť [LATIN SMALL LETTER T WITH CARON]
|
||||||
|
"\u0165" => "t"
|
||||||
|
|
||||||
|
# ŧ [LATIN SMALL LETTER T WITH STROKE]
|
||||||
|
"\u0167" => "t"
|
||||||
|
|
||||||
|
# ƫ [LATIN SMALL LETTER T WITH PALATAL HOOK]
|
||||||
|
"\u01AB" => "t"
|
||||||
|
|
||||||
|
# ƭ [LATIN SMALL LETTER T WITH HOOK]
|
||||||
|
"\u01AD" => "t"
|
||||||
|
|
||||||
|
# ț [LATIN SMALL LETTER T WITH COMMA BELOW]
|
||||||
|
"\u021B" => "t"
|
||||||
|
|
||||||
|
# ȶ [LATIN SMALL LETTER T WITH CURL]
|
||||||
|
"\u0236" => "t"
|
||||||
|
|
||||||
|
# ʇ [LATIN SMALL LETTER TURNED T]
|
||||||
|
"\u0287" => "t"
|
||||||
|
|
||||||
|
# ʈ [LATIN SMALL LETTER T WITH RETROFLEX HOOK]
|
||||||
|
"\u0288" => "t"
|
||||||
|
|
||||||
|
# ᵵ [LATIN SMALL LETTER T WITH MIDDLE TILDE]
|
||||||
|
"\u1D75" => "t"
|
||||||
|
|
||||||
|
# ṫ [LATIN SMALL LETTER T WITH DOT ABOVE]
|
||||||
|
"\u1E6B" => "t"
|
||||||
|
|
||||||
|
# ṭ [LATIN SMALL LETTER T WITH DOT BELOW]
|
||||||
|
"\u1E6D" => "t"
|
||||||
|
|
||||||
|
# ṯ [LATIN SMALL LETTER T WITH LINE BELOW]
|
||||||
|
"\u1E6F" => "t"
|
||||||
|
|
||||||
|
# ṱ [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW]
|
||||||
|
"\u1E71" => "t"
|
||||||
|
|
||||||
|
# ẗ [LATIN SMALL LETTER T WITH DIAERESIS]
|
||||||
|
"\u1E97" => "t"
|
||||||
|
|
||||||
|
# ⓣ [CIRCLED LATIN SMALL LETTER T]
|
||||||
|
"\u24E3" => "t"
|
||||||
|
|
||||||
|
# ⱦ [LATIN SMALL LETTER T WITH DIAGONAL STROKE]
|
||||||
|
"\u2C66" => "t"
|
||||||
|
|
||||||
|
# t [FULLWIDTH LATIN SMALL LETTER T]
|
||||||
|
"\uFF54" => "t"
|
||||||
|
|
||||||
|
# Þ [LATIN CAPITAL LETTER THORN]
|
||||||
|
"\u00DE" => "TH"
|
||||||
|
|
||||||
|
# Ꝧ [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER]
|
||||||
|
"\uA766" => "TH"
|
||||||
|
|
||||||
|
# Ꜩ [LATIN CAPITAL LETTER TZ]
|
||||||
|
"\uA728" => "TZ"
|
||||||
|
|
||||||
|
# ⒯ [PARENTHESIZED LATIN SMALL LETTER T]
|
||||||
|
"\u24AF" => "(t)"
|
||||||
|
|
||||||
|
# ʨ [LATIN SMALL LETTER TC DIGRAPH WITH CURL]
|
||||||
|
"\u02A8" => "tc"
|
||||||
|
|
||||||
|
# þ [LATIN SMALL LETTER THORN]
|
||||||
|
"\u00FE" => "th"
|
||||||
|
|
||||||
|
# ᵺ [LATIN SMALL LETTER TH WITH STRIKETHROUGH]
|
||||||
|
"\u1D7A" => "th"
|
||||||
|
|
||||||
|
# ꝧ [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER]
|
||||||
|
"\uA767" => "th"
|
||||||
|
|
||||||
|
# ʦ [LATIN SMALL LETTER TS DIGRAPH]
|
||||||
|
"\u02A6" => "ts"
|
||||||
|
|
||||||
|
# ꜩ [LATIN SMALL LETTER TZ]
|
||||||
|
"\uA729" => "tz"
|
||||||
|
|
||||||
|
# Ù [LATIN CAPITAL LETTER U WITH GRAVE]
|
||||||
|
"\u00D9" => "U"
|
||||||
|
|
||||||
|
# Ú [LATIN CAPITAL LETTER U WITH ACUTE]
|
||||||
|
"\u00DA" => "U"
|
||||||
|
|
||||||
|
# Û [LATIN CAPITAL LETTER U WITH CIRCUMFLEX]
|
||||||
|
"\u00DB" => "U"
|
||||||
|
|
||||||
|
# Ü [LATIN CAPITAL LETTER U WITH DIAERESIS]
|
||||||
|
"\u00DC" => "U"
|
||||||
|
|
||||||
|
# Ũ [LATIN CAPITAL LETTER U WITH TILDE]
|
||||||
|
"\u0168" => "U"
|
||||||
|
|
||||||
|
# Ū [LATIN CAPITAL LETTER U WITH MACRON]
|
||||||
|
"\u016A" => "U"
|
||||||
|
|
||||||
|
# Ŭ [LATIN CAPITAL LETTER U WITH BREVE]
|
||||||
|
"\u016C" => "U"
|
||||||
|
|
||||||
|
# Ů [LATIN CAPITAL LETTER U WITH RING ABOVE]
|
||||||
|
"\u016E" => "U"
|
||||||
|
|
||||||
|
# Ű [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE]
|
||||||
|
"\u0170" => "U"
|
||||||
|
|
||||||
|
# Ų [LATIN CAPITAL LETTER U WITH OGONEK]
|
||||||
|
"\u0172" => "U"
|
||||||
|
|
||||||
|
# Ư [LATIN CAPITAL LETTER U WITH HORN]
|
||||||
|
"\u01AF" => "U"
|
||||||
|
|
||||||
|
# Ǔ [LATIN CAPITAL LETTER U WITH CARON]
|
||||||
|
"\u01D3" => "U"
|
||||||
|
|
||||||
|
# Ǖ [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON]
|
||||||
|
"\u01D5" => "U"
|
||||||
|
|
||||||
|
# Ǘ [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE]
|
||||||
|
"\u01D7" => "U"
|
||||||
|
|
||||||
|
# Ǚ [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON]
|
||||||
|
"\u01D9" => "U"
|
||||||
|
|
||||||
|
# Ǜ [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE]
|
||||||
|
"\u01DB" => "U"
|
||||||
|
|
||||||
|
# Ȕ [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE]
|
||||||
|
"\u0214" => "U"
|
||||||
|
|
||||||
|
# Ȗ [LATIN CAPITAL LETTER U WITH INVERTED BREVE]
|
||||||
|
"\u0216" => "U"
|
||||||
|
|
||||||
|
# Ʉ [LATIN CAPITAL LETTER U BAR]
|
||||||
|
"\u0244" => "U"
|
||||||
|
|
||||||
|
# ᴜ [LATIN LETTER SMALL CAPITAL U]
|
||||||
|
"\u1D1C" => "U"
|
||||||
|
|
||||||
|
# ᵾ [LATIN SMALL CAPITAL LETTER U WITH STROKE]
|
||||||
|
"\u1D7E" => "U"
|
||||||
|
|
||||||
|
# Ṳ [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW]
|
||||||
|
"\u1E72" => "U"
|
||||||
|
|
||||||
|
# Ṵ [LATIN CAPITAL LETTER U WITH TILDE BELOW]
|
||||||
|
"\u1E74" => "U"
|
||||||
|
|
||||||
|
# Ṷ [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW]
|
||||||
|
"\u1E76" => "U"
|
||||||
|
|
||||||
|
# Ṹ [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE]
|
||||||
|
"\u1E78" => "U"
|
||||||
|
|
||||||
|
# Ṻ [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS]
|
||||||
|
"\u1E7A" => "U"
|
||||||
|
|
||||||
|
# Ụ [LATIN CAPITAL LETTER U WITH DOT BELOW]
|
||||||
|
"\u1EE4" => "U"
|
||||||
|
|
||||||
|
# Ủ [LATIN CAPITAL LETTER U WITH HOOK ABOVE]
|
||||||
|
"\u1EE6" => "U"
|
||||||
|
|
||||||
|
# Ứ [LATIN CAPITAL LETTER U WITH HORN AND ACUTE]
|
||||||
|
"\u1EE8" => "U"
|
||||||
|
|
||||||
|
# Ừ [LATIN CAPITAL LETTER U WITH HORN AND GRAVE]
|
||||||
|
"\u1EEA" => "U"
|
||||||
|
|
||||||
|
# Ử [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE]
|
||||||
|
"\u1EEC" => "U"
|
||||||
|
|
||||||
|
# Ữ [LATIN CAPITAL LETTER U WITH HORN AND TILDE]
|
||||||
|
"\u1EEE" => "U"
|
||||||
|
|
||||||
|
# Ự [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW]
|
||||||
|
"\u1EF0" => "U"
|
||||||
|
|
||||||
|
# Ⓤ [CIRCLED LATIN CAPITAL LETTER U]
|
||||||
|
"\u24CA" => "U"
|
||||||
|
|
||||||
|
# U [FULLWIDTH LATIN CAPITAL LETTER U]
|
||||||
|
"\uFF35" => "U"
|
||||||
|
|
||||||
|
# ù [LATIN SMALL LETTER U WITH GRAVE]
|
||||||
|
"\u00F9" => "u"
|
||||||
|
|
||||||
|
# ú [LATIN SMALL LETTER U WITH ACUTE]
|
||||||
|
"\u00FA" => "u"
|
||||||
|
|
||||||
|
# û [LATIN SMALL LETTER U WITH CIRCUMFLEX]
|
||||||
|
"\u00FB" => "u"
|
||||||
|
|
||||||
|
# ü [LATIN SMALL LETTER U WITH DIAERESIS]
|
||||||
|
"\u00FC" => "u"
|
||||||
|
|
||||||
|
# ũ [LATIN SMALL LETTER U WITH TILDE]
|
||||||
|
"\u0169" => "u"
|
||||||
|
|
||||||
|
# ū [LATIN SMALL LETTER U WITH MACRON]
|
||||||
|
"\u016B" => "u"
|
||||||
|
|
||||||
|
# ŭ [LATIN SMALL LETTER U WITH BREVE]
|
||||||
|
"\u016D" => "u"
|
||||||
|
|
||||||
|
# ů [LATIN SMALL LETTER U WITH RING ABOVE]
|
||||||
|
"\u016F" => "u"
|
||||||
|
|
||||||
|
# ű [LATIN SMALL LETTER U WITH DOUBLE ACUTE]
|
||||||
|
"\u0171" => "u"
|
||||||
|
|
||||||
|
# ų [LATIN SMALL LETTER U WITH OGONEK]
|
||||||
|
"\u0173" => "u"
|
||||||
|
|
||||||
|
# ư [LATIN SMALL LETTER U WITH HORN]
|
||||||
|
"\u01B0" => "u"
|
||||||
|
|
||||||
|
# ǔ [LATIN SMALL LETTER U WITH CARON]
|
||||||
|
"\u01D4" => "u"
|
||||||
|
|
||||||
|
# ǖ [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON]
|
||||||
|
"\u01D6" => "u"
|
||||||
|
|
||||||
|
# ǘ [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE]
|
||||||
|
"\u01D8" => "u"
|
||||||
|
|
||||||
|
# ǚ [LATIN SMALL LETTER U WITH DIAERESIS AND CARON]
|
||||||
|
"\u01DA" => "u"
|
||||||
|
|
||||||
|
# ǜ [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE]
|
||||||
|
"\u01DC" => "u"
|
||||||
|
|
||||||
|
# ȕ [LATIN SMALL LETTER U WITH DOUBLE GRAVE]
|
||||||
|
"\u0215" => "u"
|
||||||
|
|
||||||
|
# ȗ [LATIN SMALL LETTER U WITH INVERTED BREVE]
|
||||||
|
"\u0217" => "u"
|
||||||
|
|
||||||
|
# ʉ [LATIN SMALL LETTER U BAR]
|
||||||
|
"\u0289" => "u"
|
||||||
|
|
||||||
|
# ᵤ [LATIN SUBSCRIPT SMALL LETTER U]
|
||||||
|
"\u1D64" => "u"
|
||||||
|
|
||||||
|
# ᶙ [LATIN SMALL LETTER U WITH RETROFLEX HOOK]
|
||||||
|
"\u1D99" => "u"
|
||||||
|
|
||||||
|
# ṳ [LATIN SMALL LETTER U WITH DIAERESIS BELOW]
|
||||||
|
"\u1E73" => "u"
|
||||||
|
|
||||||
|
# ṵ [LATIN SMALL LETTER U WITH TILDE BELOW]
|
||||||
|
"\u1E75" => "u"
|
||||||
|
|
||||||
|
# ṷ [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW]
|
||||||
|
"\u1E77" => "u"
|
||||||
|
|
||||||
|
# ṹ [LATIN SMALL LETTER U WITH TILDE AND ACUTE]
|
||||||
|
"\u1E79" => "u"
|
||||||
|
|
||||||
|
# ṻ [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS]
|
||||||
|
"\u1E7B" => "u"
|
||||||
|
|
||||||
|
# ụ [LATIN SMALL LETTER U WITH DOT BELOW]
|
||||||
|
"\u1EE5" => "u"
|
||||||
|
|
||||||
|
# ủ [LATIN SMALL LETTER U WITH HOOK ABOVE]
|
||||||
|
"\u1EE7" => "u"
|
||||||
|
|
||||||
|
# ứ [LATIN SMALL LETTER U WITH HORN AND ACUTE]
|
||||||
|
"\u1EE9" => "u"
|
||||||
|
|
||||||
|
# ừ [LATIN SMALL LETTER U WITH HORN AND GRAVE]
|
||||||
|
"\u1EEB" => "u"
|
||||||
|
|
||||||
|
# ử [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE]
|
||||||
|
"\u1EED" => "u"
|
||||||
|
|
||||||
|
# ữ [LATIN SMALL LETTER U WITH HORN AND TILDE]
|
||||||
|
"\u1EEF" => "u"
|
||||||
|
|
||||||
|
# ự [LATIN SMALL LETTER U WITH HORN AND DOT BELOW]
|
||||||
|
"\u1EF1" => "u"
|
||||||
|
|
||||||
|
# ⓤ [CIRCLED LATIN SMALL LETTER U]
|
||||||
|
"\u24E4" => "u"
|
||||||
|
|
||||||
|
# u [FULLWIDTH LATIN SMALL LETTER U]
|
||||||
|
"\uFF55" => "u"
|
||||||
|
|
||||||
|
# ⒰ [PARENTHESIZED LATIN SMALL LETTER U]
|
||||||
|
"\u24B0" => "(u)"
|
||||||
|
|
||||||
|
# ᵫ [LATIN SMALL LETTER UE]
|
||||||
|
"\u1D6B" => "ue"
|
||||||
|
|
||||||
|
# Ʋ [LATIN CAPITAL LETTER V WITH HOOK]
|
||||||
|
"\u01B2" => "V"
|
||||||
|
|
||||||
|
# Ʌ [LATIN CAPITAL LETTER TURNED V]
|
||||||
|
"\u0245" => "V"
|
||||||
|
|
||||||
|
# ᴠ [LATIN LETTER SMALL CAPITAL V]
|
||||||
|
"\u1D20" => "V"
|
||||||
|
|
||||||
|
# Ṽ [LATIN CAPITAL LETTER V WITH TILDE]
|
||||||
|
"\u1E7C" => "V"
|
||||||
|
|
||||||
|
# Ṿ [LATIN CAPITAL LETTER V WITH DOT BELOW]
|
||||||
|
"\u1E7E" => "V"
|
||||||
|
|
||||||
|
# Ỽ [LATIN CAPITAL LETTER MIDDLE-WELSH V]
|
||||||
|
"\u1EFC" => "V"
|
||||||
|
|
||||||
|
# Ⓥ [CIRCLED LATIN CAPITAL LETTER V]
|
||||||
|
"\u24CB" => "V"
|
||||||
|
|
||||||
|
# Ꝟ [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE]
|
||||||
|
"\uA75E" => "V"
|
||||||
|
|
||||||
|
# Ꝩ [LATIN CAPITAL LETTER VEND]
|
||||||
|
"\uA768" => "V"
|
||||||
|
|
||||||
|
# V [FULLWIDTH LATIN CAPITAL LETTER V]
|
||||||
|
"\uFF36" => "V"
|
||||||
|
|
||||||
|
# ʋ [LATIN SMALL LETTER V WITH HOOK]
|
||||||
|
"\u028B" => "v"
|
||||||
|
|
||||||
|
# ʌ [LATIN SMALL LETTER TURNED V]
|
||||||
|
"\u028C" => "v"
|
||||||
|
|
||||||
|
# ᵥ [LATIN SUBSCRIPT SMALL LETTER V]
|
||||||
|
"\u1D65" => "v"
|
||||||
|
|
||||||
|
# ᶌ [LATIN SMALL LETTER V WITH PALATAL HOOK]
|
||||||
|
"\u1D8C" => "v"
|
||||||
|
|
||||||
|
# ṽ [LATIN SMALL LETTER V WITH TILDE]
|
||||||
|
"\u1E7D" => "v"
|
||||||
|
|
||||||
|
# ṿ [LATIN SMALL LETTER V WITH DOT BELOW]
|
||||||
|
"\u1E7F" => "v"
|
||||||
|
|
||||||
|
# ⓥ [CIRCLED LATIN SMALL LETTER V]
|
||||||
|
"\u24E5" => "v"
|
||||||
|
|
||||||
|
# ⱱ [LATIN SMALL LETTER V WITH RIGHT HOOK]
|
||||||
|
"\u2C71" => "v"
|
||||||
|
|
||||||
|
# ⱴ [LATIN SMALL LETTER V WITH CURL]
|
||||||
|
"\u2C74" => "v"
|
||||||
|
|
||||||
|
# ꝟ [LATIN SMALL LETTER V WITH DIAGONAL STROKE]
|
||||||
|
"\uA75F" => "v"
|
||||||
|
|
||||||
|
# v [FULLWIDTH LATIN SMALL LETTER V]
|
||||||
|
"\uFF56" => "v"
|
||||||
|
|
||||||
|
# Ꝡ [LATIN CAPITAL LETTER VY]
|
||||||
|
"\uA760" => "VY"
|
||||||
|
|
||||||
|
# ⒱ [PARENTHESIZED LATIN SMALL LETTER V]
|
||||||
|
"\u24B1" => "(v)"
|
||||||
|
|
||||||
|
# ꝡ [LATIN SMALL LETTER VY]
|
||||||
|
"\uA761" => "vy"
|
||||||
|
|
||||||
|
# Ŵ [LATIN CAPITAL LETTER W WITH CIRCUMFLEX]
|
||||||
|
"\u0174" => "W"
|
||||||
|
|
||||||
|
# Ƿ http://en.wikipedia.org/wiki/Wynn [LATIN CAPITAL LETTER WYNN]
|
||||||
|
"\u01F7" => "W"
|
||||||
|
|
||||||
|
# ᴡ [LATIN LETTER SMALL CAPITAL W]
|
||||||
|
"\u1D21" => "W"
|
||||||
|
|
||||||
|
# Ẁ [LATIN CAPITAL LETTER W WITH GRAVE]
|
||||||
|
"\u1E80" => "W"
|
||||||
|
|
||||||
|
# Ẃ [LATIN CAPITAL LETTER W WITH ACUTE]
|
||||||
|
"\u1E82" => "W"
|
||||||
|
|
||||||
|
# Ẅ [LATIN CAPITAL LETTER W WITH DIAERESIS]
|
||||||
|
"\u1E84" => "W"
|
||||||
|
|
||||||
|
# Ẇ [LATIN CAPITAL LETTER W WITH DOT ABOVE]
|
||||||
|
"\u1E86" => "W"
|
||||||
|
|
||||||
|
# Ẉ [LATIN CAPITAL LETTER W WITH DOT BELOW]
|
||||||
|
"\u1E88" => "W"
|
||||||
|
|
||||||
|
# Ⓦ [CIRCLED LATIN CAPITAL LETTER W]
|
||||||
|
"\u24CC" => "W"
|
||||||
|
|
||||||
|
# Ⱳ [LATIN CAPITAL LETTER W WITH HOOK]
|
||||||
|
"\u2C72" => "W"
|
||||||
|
|
||||||
|
# W [FULLWIDTH LATIN CAPITAL LETTER W]
|
||||||
|
"\uFF37" => "W"
|
||||||
|
|
||||||
|
# ŵ [LATIN SMALL LETTER W WITH CIRCUMFLEX]
|
||||||
|
"\u0175" => "w"
|
||||||
|
|
||||||
|
# ƿ http://en.wikipedia.org/wiki/Wynn [LATIN LETTER WYNN]
|
||||||
|
"\u01BF" => "w"
|
||||||
|
|
||||||
|
# ʍ [LATIN SMALL LETTER TURNED W]
|
||||||
|
"\u028D" => "w"
|
||||||
|
|
||||||
|
# ẁ [LATIN SMALL LETTER W WITH GRAVE]
|
||||||
|
"\u1E81" => "w"
|
||||||
|
|
||||||
|
# ẃ [LATIN SMALL LETTER W WITH ACUTE]
|
||||||
|
"\u1E83" => "w"
|
||||||
|
|
||||||
|
# ẅ [LATIN SMALL LETTER W WITH DIAERESIS]
|
||||||
|
"\u1E85" => "w"
|
||||||
|
|
||||||
|
# ẇ [LATIN SMALL LETTER W WITH DOT ABOVE]
|
||||||
|
"\u1E87" => "w"
|
||||||
|
|
||||||
|
# ẉ [LATIN SMALL LETTER W WITH DOT BELOW]
|
||||||
|
"\u1E89" => "w"
|
||||||
|
|
||||||
|
# ẘ [LATIN SMALL LETTER W WITH RING ABOVE]
|
||||||
|
"\u1E98" => "w"
|
||||||
|
|
||||||
|
# ⓦ [CIRCLED LATIN SMALL LETTER W]
|
||||||
|
"\u24E6" => "w"
|
||||||
|
|
||||||
|
# ⱳ [LATIN SMALL LETTER W WITH HOOK]
|
||||||
|
"\u2C73" => "w"
|
||||||
|
|
||||||
|
# w [FULLWIDTH LATIN SMALL LETTER W]
|
||||||
|
"\uFF57" => "w"
|
||||||
|
|
||||||
|
# ⒲ [PARENTHESIZED LATIN SMALL LETTER W]
|
||||||
|
"\u24B2" => "(w)"
|
||||||
|
|
||||||
|
# Ẋ [LATIN CAPITAL LETTER X WITH DOT ABOVE]
|
||||||
|
"\u1E8A" => "X"
|
||||||
|
|
||||||
|
# Ẍ [LATIN CAPITAL LETTER X WITH DIAERESIS]
|
||||||
|
"\u1E8C" => "X"
|
||||||
|
|
||||||
|
# Ⓧ [CIRCLED LATIN CAPITAL LETTER X]
|
||||||
|
"\u24CD" => "X"
|
||||||
|
|
||||||
|
# X [FULLWIDTH LATIN CAPITAL LETTER X]
|
||||||
|
"\uFF38" => "X"
|
||||||
|
|
||||||
|
# ᶍ [LATIN SMALL LETTER X WITH PALATAL HOOK]
|
||||||
|
"\u1D8D" => "x"
|
||||||
|
|
||||||
|
# ẋ [LATIN SMALL LETTER X WITH DOT ABOVE]
|
||||||
|
"\u1E8B" => "x"
|
||||||
|
|
||||||
|
# ẍ [LATIN SMALL LETTER X WITH DIAERESIS]
|
||||||
|
"\u1E8D" => "x"
|
||||||
|
|
||||||
|
# ₓ [LATIN SUBSCRIPT SMALL LETTER X]
|
||||||
|
"\u2093" => "x"
|
||||||
|
|
||||||
|
# ⓧ [CIRCLED LATIN SMALL LETTER X]
|
||||||
|
"\u24E7" => "x"
|
||||||
|
|
||||||
|
# x [FULLWIDTH LATIN SMALL LETTER X]
|
||||||
|
"\uFF58" => "x"
|
||||||
|
|
||||||
|
# ⒳ [PARENTHESIZED LATIN SMALL LETTER X]
|
||||||
|
"\u24B3" => "(x)"
|
||||||
|
|
||||||
|
# Ý [LATIN CAPITAL LETTER Y WITH ACUTE]
|
||||||
|
"\u00DD" => "Y"
|
||||||
|
|
||||||
|
# Ŷ [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX]
|
||||||
|
"\u0176" => "Y"
|
||||||
|
|
||||||
|
# Ÿ [LATIN CAPITAL LETTER Y WITH DIAERESIS]
|
||||||
|
"\u0178" => "Y"
|
||||||
|
|
||||||
|
# Ƴ [LATIN CAPITAL LETTER Y WITH HOOK]
|
||||||
|
"\u01B3" => "Y"
|
||||||
|
|
||||||
|
# Ȳ [LATIN CAPITAL LETTER Y WITH MACRON]
|
||||||
|
"\u0232" => "Y"
|
||||||
|
|
||||||
|
# Ɏ [LATIN CAPITAL LETTER Y WITH STROKE]
|
||||||
|
"\u024E" => "Y"
|
||||||
|
|
||||||
|
# ʏ [LATIN LETTER SMALL CAPITAL Y]
|
||||||
|
"\u028F" => "Y"
|
||||||
|
|
||||||
|
# Ẏ [LATIN CAPITAL LETTER Y WITH DOT ABOVE]
|
||||||
|
"\u1E8E" => "Y"
|
||||||
|
|
||||||
|
# Ỳ [LATIN CAPITAL LETTER Y WITH GRAVE]
|
||||||
|
"\u1EF2" => "Y"
|
||||||
|
|
||||||
|
# Ỵ [LATIN CAPITAL LETTER Y WITH DOT BELOW]
|
||||||
|
"\u1EF4" => "Y"
|
||||||
|
|
||||||
|
# Ỷ [LATIN CAPITAL LETTER Y WITH HOOK ABOVE]
|
||||||
|
"\u1EF6" => "Y"
|
||||||
|
|
||||||
|
# Ỹ [LATIN CAPITAL LETTER Y WITH TILDE]
|
||||||
|
"\u1EF8" => "Y"
|
||||||
|
|
||||||
|
# Ỿ [LATIN CAPITAL LETTER Y WITH LOOP]
|
||||||
|
"\u1EFE" => "Y"
|
||||||
|
|
||||||
|
# Ⓨ [CIRCLED LATIN CAPITAL LETTER Y]
|
||||||
|
"\u24CE" => "Y"
|
||||||
|
|
||||||
|
# Y [FULLWIDTH LATIN CAPITAL LETTER Y]
|
||||||
|
"\uFF39" => "Y"
|
||||||
|
|
||||||
|
# ý [LATIN SMALL LETTER Y WITH ACUTE]
|
||||||
|
"\u00FD" => "y"
|
||||||
|
|
||||||
|
# ÿ [LATIN SMALL LETTER Y WITH DIAERESIS]
|
||||||
|
"\u00FF" => "y"
|
||||||
|
|
||||||
|
# ŷ [LATIN SMALL LETTER Y WITH CIRCUMFLEX]
|
||||||
|
"\u0177" => "y"
|
||||||
|
|
||||||
|
# ƴ [LATIN SMALL LETTER Y WITH HOOK]
|
||||||
|
"\u01B4" => "y"
|
||||||
|
|
||||||
|
# ȳ [LATIN SMALL LETTER Y WITH MACRON]
|
||||||
|
"\u0233" => "y"
|
||||||
|
|
||||||
|
# ɏ [LATIN SMALL LETTER Y WITH STROKE]
|
||||||
|
"\u024F" => "y"
|
||||||
|
|
||||||
|
# ʎ [LATIN SMALL LETTER TURNED Y]
|
||||||
|
"\u028E" => "y"
|
||||||
|
|
||||||
|
# ẏ [LATIN SMALL LETTER Y WITH DOT ABOVE]
|
||||||
|
"\u1E8F" => "y"
|
||||||
|
|
||||||
|
# ẙ [LATIN SMALL LETTER Y WITH RING ABOVE]
|
||||||
|
"\u1E99" => "y"
|
||||||
|
|
||||||
|
# ỳ [LATIN SMALL LETTER Y WITH GRAVE]
|
||||||
|
"\u1EF3" => "y"
|
||||||
|
|
||||||
|
# ỵ [LATIN SMALL LETTER Y WITH DOT BELOW]
|
||||||
|
"\u1EF5" => "y"
|
||||||
|
|
||||||
|
# ỷ [LATIN SMALL LETTER Y WITH HOOK ABOVE]
|
||||||
|
"\u1EF7" => "y"
|
||||||
|
|
||||||
|
# ỹ [LATIN SMALL LETTER Y WITH TILDE]
|
||||||
|
"\u1EF9" => "y"
|
||||||
|
|
||||||
|
# ỿ [LATIN SMALL LETTER Y WITH LOOP]
|
||||||
|
"\u1EFF" => "y"
|
||||||
|
|
||||||
|
# ⓨ [CIRCLED LATIN SMALL LETTER Y]
|
||||||
|
"\u24E8" => "y"
|
||||||
|
|
||||||
|
# y [FULLWIDTH LATIN SMALL LETTER Y]
|
||||||
|
"\uFF59" => "y"
|
||||||
|
|
||||||
|
# ⒴ [PARENTHESIZED LATIN SMALL LETTER Y]
|
||||||
|
"\u24B4" => "(y)"
|
||||||
|
|
||||||
|
# Ź [LATIN CAPITAL LETTER Z WITH ACUTE]
|
||||||
|
"\u0179" => "Z"
|
||||||
|
|
||||||
|
# Ż [LATIN CAPITAL LETTER Z WITH DOT ABOVE]
|
||||||
|
"\u017B" => "Z"
|
||||||
|
|
||||||
|
# Ž [LATIN CAPITAL LETTER Z WITH CARON]
|
||||||
|
"\u017D" => "Z"
|
||||||
|
|
||||||
|
# Ƶ [LATIN CAPITAL LETTER Z WITH STROKE]
|
||||||
|
"\u01B5" => "Z"
|
||||||
|
|
||||||
|
# Ȝ http://en.wikipedia.org/wiki/Yogh [LATIN CAPITAL LETTER YOGH]
|
||||||
|
"\u021C" => "Z"
|
||||||
|
|
||||||
|
# Ȥ [LATIN CAPITAL LETTER Z WITH HOOK]
|
||||||
|
"\u0224" => "Z"
|
||||||
|
|
||||||
|
# ᴢ [LATIN LETTER SMALL CAPITAL Z]
|
||||||
|
"\u1D22" => "Z"
|
||||||
|
|
||||||
|
# Ẑ [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX]
|
||||||
|
"\u1E90" => "Z"
|
||||||
|
|
||||||
|
# Ẓ [LATIN CAPITAL LETTER Z WITH DOT BELOW]
|
||||||
|
"\u1E92" => "Z"
|
||||||
|
|
||||||
|
# Ẕ [LATIN CAPITAL LETTER Z WITH LINE BELOW]
|
||||||
|
"\u1E94" => "Z"
|
||||||
|
|
||||||
|
# Ⓩ [CIRCLED LATIN CAPITAL LETTER Z]
|
||||||
|
"\u24CF" => "Z"
|
||||||
|
|
||||||
|
# Ⱬ [LATIN CAPITAL LETTER Z WITH DESCENDER]
|
||||||
|
"\u2C6B" => "Z"
|
||||||
|
|
||||||
|
# Ꝣ [LATIN CAPITAL LETTER VISIGOTHIC Z]
|
||||||
|
"\uA762" => "Z"
|
||||||
|
|
||||||
|
# Z [FULLWIDTH LATIN CAPITAL LETTER Z]
|
||||||
|
"\uFF3A" => "Z"
|
||||||
|
|
||||||
|
# ź [LATIN SMALL LETTER Z WITH ACUTE]
|
||||||
|
"\u017A" => "z"
|
||||||
|
|
||||||
|
# ż [LATIN SMALL LETTER Z WITH DOT ABOVE]
|
||||||
|
"\u017C" => "z"
|
||||||
|
|
||||||
|
# ž [LATIN SMALL LETTER Z WITH CARON]
|
||||||
|
"\u017E" => "z"
|
||||||
|
|
||||||
|
# ƶ [LATIN SMALL LETTER Z WITH STROKE]
|
||||||
|
"\u01B6" => "z"
|
||||||
|
|
||||||
|
# ȝ http://en.wikipedia.org/wiki/Yogh [LATIN SMALL LETTER YOGH]
|
||||||
|
"\u021D" => "z"
|
||||||
|
|
||||||
|
# ȥ [LATIN SMALL LETTER Z WITH HOOK]
|
||||||
|
"\u0225" => "z"
|
||||||
|
|
||||||
|
# ɀ [LATIN SMALL LETTER Z WITH SWASH TAIL]
|
||||||
|
"\u0240" => "z"
|
||||||
|
|
||||||
|
# ʐ [LATIN SMALL LETTER Z WITH RETROFLEX HOOK]
|
||||||
|
"\u0290" => "z"
|
||||||
|
|
||||||
|
# ʑ [LATIN SMALL LETTER Z WITH CURL]
|
||||||
|
"\u0291" => "z"
|
||||||
|
|
||||||
|
# ᵶ [LATIN SMALL LETTER Z WITH MIDDLE TILDE]
|
||||||
|
"\u1D76" => "z"
|
||||||
|
|
||||||
|
# ᶎ [LATIN SMALL LETTER Z WITH PALATAL HOOK]
|
||||||
|
"\u1D8E" => "z"
|
||||||
|
|
||||||
|
# ẑ [LATIN SMALL LETTER Z WITH CIRCUMFLEX]
|
||||||
|
"\u1E91" => "z"
|
||||||
|
|
||||||
|
# ẓ [LATIN SMALL LETTER Z WITH DOT BELOW]
|
||||||
|
"\u1E93" => "z"
|
||||||
|
|
||||||
|
# ẕ [LATIN SMALL LETTER Z WITH LINE BELOW]
|
||||||
|
"\u1E95" => "z"
|
||||||
|
|
||||||
|
# ⓩ [CIRCLED LATIN SMALL LETTER Z]
|
||||||
|
"\u24E9" => "z"
|
||||||
|
|
||||||
|
# ⱬ [LATIN SMALL LETTER Z WITH DESCENDER]
|
||||||
|
"\u2C6C" => "z"
|
||||||
|
|
||||||
|
# ꝣ [LATIN SMALL LETTER VISIGOTHIC Z]
|
||||||
|
"\uA763" => "z"
|
||||||
|
|
||||||
|
# z [FULLWIDTH LATIN SMALL LETTER Z]
|
||||||
|
"\uFF5A" => "z"
|
||||||
|
|
||||||
|
# ⒵ [PARENTHESIZED LATIN SMALL LETTER Z]
|
||||||
|
"\u24B5" => "(z)"
|
||||||
|
|
||||||
|
# ⁰ [SUPERSCRIPT ZERO]
|
||||||
|
"\u2070" => "0"
|
||||||
|
|
||||||
|
# ₀ [SUBSCRIPT ZERO]
|
||||||
|
"\u2080" => "0"
|
||||||
|
|
||||||
|
# ⓪ [CIRCLED DIGIT ZERO]
|
||||||
|
"\u24EA" => "0"
|
||||||
|
|
||||||
|
# ⓿ [NEGATIVE CIRCLED DIGIT ZERO]
|
||||||
|
"\u24FF" => "0"
|
||||||
|
|
||||||
|
# 0 [FULLWIDTH DIGIT ZERO]
|
||||||
|
"\uFF10" => "0"
|
||||||
|
|
||||||
|
# ¹ [SUPERSCRIPT ONE]
|
||||||
|
"\u00B9" => "1"
|
||||||
|
|
||||||
|
# ₁ [SUBSCRIPT ONE]
|
||||||
|
"\u2081" => "1"
|
||||||
|
|
||||||
|
# ① [CIRCLED DIGIT ONE]
|
||||||
|
"\u2460" => "1"
|
||||||
|
|
||||||
|
# ⓵ [DOUBLE CIRCLED DIGIT ONE]
|
||||||
|
"\u24F5" => "1"
|
||||||
|
|
||||||
|
# ❶ [DINGBAT NEGATIVE CIRCLED DIGIT ONE]
|
||||||
|
"\u2776" => "1"
|
||||||
|
|
||||||
|
# ➀ [DINGBAT CIRCLED SANS-SERIF DIGIT ONE]
|
||||||
|
"\u2780" => "1"
|
||||||
|
|
||||||
|
# ➊ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE]
|
||||||
|
"\u278A" => "1"
|
||||||
|
|
||||||
|
# 1 [FULLWIDTH DIGIT ONE]
|
||||||
|
"\uFF11" => "1"
|
||||||
|
|
||||||
|
# ⒈ [DIGIT ONE FULL STOP]
|
||||||
|
"\u2488" => "1."
|
||||||
|
|
||||||
|
# ⑴ [PARENTHESIZED DIGIT ONE]
|
||||||
|
"\u2474" => "(1)"
|
||||||
|
|
||||||
|
# ² [SUPERSCRIPT TWO]
|
||||||
|
"\u00B2" => "2"
|
||||||
|
|
||||||
|
# ₂ [SUBSCRIPT TWO]
|
||||||
|
"\u2082" => "2"
|
||||||
|
|
||||||
|
# ② [CIRCLED DIGIT TWO]
|
||||||
|
"\u2461" => "2"
|
||||||
|
|
||||||
|
# ⓶ [DOUBLE CIRCLED DIGIT TWO]
|
||||||
|
"\u24F6" => "2"
|
||||||
|
|
||||||
|
# ❷ [DINGBAT NEGATIVE CIRCLED DIGIT TWO]
|
||||||
|
"\u2777" => "2"
|
||||||
|
|
||||||
|
# ➁ [DINGBAT CIRCLED SANS-SERIF DIGIT TWO]
|
||||||
|
"\u2781" => "2"
|
||||||
|
|
||||||
|
# ➋ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO]
|
||||||
|
"\u278B" => "2"
|
||||||
|
|
||||||
|
# 2 [FULLWIDTH DIGIT TWO]
|
||||||
|
"\uFF12" => "2"
|
||||||
|
|
||||||
|
# ⒉ [DIGIT TWO FULL STOP]
|
||||||
|
"\u2489" => "2."
|
||||||
|
|
||||||
|
# ⑵ [PARENTHESIZED DIGIT TWO]
|
||||||
|
"\u2475" => "(2)"
|
||||||
|
|
||||||
|
# ³ [SUPERSCRIPT THREE]
|
||||||
|
"\u00B3" => "3"
|
||||||
|
|
||||||
|
# ₃ [SUBSCRIPT THREE]
|
||||||
|
"\u2083" => "3"
|
||||||
|
|
||||||
|
# ③ [CIRCLED DIGIT THREE]
|
||||||
|
"\u2462" => "3"
|
||||||
|
|
||||||
|
# ⓷ [DOUBLE CIRCLED DIGIT THREE]
|
||||||
|
"\u24F7" => "3"
|
||||||
|
|
||||||
|
# ❸ [DINGBAT NEGATIVE CIRCLED DIGIT THREE]
|
||||||
|
"\u2778" => "3"
|
||||||
|
|
||||||
|
# ➂ [DINGBAT CIRCLED SANS-SERIF DIGIT THREE]
|
||||||
|
"\u2782" => "3"
|
||||||
|
|
||||||
|
# ➌ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE]
|
||||||
|
"\u278C" => "3"
|
||||||
|
|
||||||
|
# 3 [FULLWIDTH DIGIT THREE]
|
||||||
|
"\uFF13" => "3"
|
||||||
|
|
||||||
|
# ⒊ [DIGIT THREE FULL STOP]
|
||||||
|
"\u248A" => "3."
|
||||||
|
|
||||||
|
# ⑶ [PARENTHESIZED DIGIT THREE]
|
||||||
|
"\u2476" => "(3)"
|
||||||
|
|
||||||
|
# ⁴ [SUPERSCRIPT FOUR]
|
||||||
|
"\u2074" => "4"
|
||||||
|
|
||||||
|
# ₄ [SUBSCRIPT FOUR]
|
||||||
|
"\u2084" => "4"
|
||||||
|
|
||||||
|
# ④ [CIRCLED DIGIT FOUR]
|
||||||
|
"\u2463" => "4"
|
||||||
|
|
||||||
|
# ⓸ [DOUBLE CIRCLED DIGIT FOUR]
|
||||||
|
"\u24F8" => "4"
|
||||||
|
|
||||||
|
# ❹ [DINGBAT NEGATIVE CIRCLED DIGIT FOUR]
|
||||||
|
"\u2779" => "4"
|
||||||
|
|
||||||
|
# ➃ [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR]
|
||||||
|
"\u2783" => "4"
|
||||||
|
|
||||||
|
# ➍ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR]
|
||||||
|
"\u278D" => "4"
|
||||||
|
|
||||||
|
# 4 [FULLWIDTH DIGIT FOUR]
|
||||||
|
"\uFF14" => "4"
|
||||||
|
|
||||||
|
# ⒋ [DIGIT FOUR FULL STOP]
|
||||||
|
"\u248B" => "4."
|
||||||
|
|
||||||
|
# ⑷ [PARENTHESIZED DIGIT FOUR]
|
||||||
|
"\u2477" => "(4)"
|
||||||
|
|
||||||
|
# ⁵ [SUPERSCRIPT FIVE]
|
||||||
|
"\u2075" => "5"
|
||||||
|
|
||||||
|
# ₅ [SUBSCRIPT FIVE]
|
||||||
|
"\u2085" => "5"
|
||||||
|
|
||||||
|
# ⑤ [CIRCLED DIGIT FIVE]
|
||||||
|
"\u2464" => "5"
|
||||||
|
|
||||||
|
# ⓹ [DOUBLE CIRCLED DIGIT FIVE]
|
||||||
|
"\u24F9" => "5"
|
||||||
|
|
||||||
|
# ❺ [DINGBAT NEGATIVE CIRCLED DIGIT FIVE]
|
||||||
|
"\u277A" => "5"
|
||||||
|
|
||||||
|
# ➄ [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE]
|
||||||
|
"\u2784" => "5"
|
||||||
|
|
||||||
|
# ➎ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE]
|
||||||
|
"\u278E" => "5"
|
||||||
|
|
||||||
|
# 5 [FULLWIDTH DIGIT FIVE]
|
||||||
|
"\uFF15" => "5"
|
||||||
|
|
||||||
|
# ⒌ [DIGIT FIVE FULL STOP]
|
||||||
|
"\u248C" => "5."
|
||||||
|
|
||||||
|
# ⑸ [PARENTHESIZED DIGIT FIVE]
|
||||||
|
"\u2478" => "(5)"
|
||||||
|
|
||||||
|
# ⁶ [SUPERSCRIPT SIX]
|
||||||
|
"\u2076" => "6"
|
||||||
|
|
||||||
|
# ₆ [SUBSCRIPT SIX]
|
||||||
|
"\u2086" => "6"
|
||||||
|
|
||||||
|
# ⑥ [CIRCLED DIGIT SIX]
|
||||||
|
"\u2465" => "6"
|
||||||
|
|
||||||
|
# ⓺ [DOUBLE CIRCLED DIGIT SIX]
|
||||||
|
"\u24FA" => "6"
|
||||||
|
|
||||||
|
# ❻ [DINGBAT NEGATIVE CIRCLED DIGIT SIX]
|
||||||
|
"\u277B" => "6"
|
||||||
|
|
||||||
|
# ➅ [DINGBAT CIRCLED SANS-SERIF DIGIT SIX]
|
||||||
|
"\u2785" => "6"
|
||||||
|
|
||||||
|
# ➏ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX]
|
||||||
|
"\u278F" => "6"
|
||||||
|
|
||||||
|
# 6 [FULLWIDTH DIGIT SIX]
|
||||||
|
"\uFF16" => "6"
|
||||||
|
|
||||||
|
# ⒍ [DIGIT SIX FULL STOP]
|
||||||
|
"\u248D" => "6."
|
||||||
|
|
||||||
|
# ⑹ [PARENTHESIZED DIGIT SIX]
|
||||||
|
"\u2479" => "(6)"
|
||||||
|
|
||||||
|
# ⁷ [SUPERSCRIPT SEVEN]
|
||||||
|
"\u2077" => "7"
|
||||||
|
|
||||||
|
# ₇ [SUBSCRIPT SEVEN]
|
||||||
|
"\u2087" => "7"
|
||||||
|
|
||||||
|
# ⑦ [CIRCLED DIGIT SEVEN]
|
||||||
|
"\u2466" => "7"
|
||||||
|
|
||||||
|
# ⓻ [DOUBLE CIRCLED DIGIT SEVEN]
|
||||||
|
"\u24FB" => "7"
|
||||||
|
|
||||||
|
# ❼ [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN]
|
||||||
|
"\u277C" => "7"
|
||||||
|
|
||||||
|
# ➆ [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN]
|
||||||
|
"\u2786" => "7"
|
||||||
|
|
||||||
|
# ➐ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN]
|
||||||
|
"\u2790" => "7"
|
||||||
|
|
||||||
|
# 7 [FULLWIDTH DIGIT SEVEN]
|
||||||
|
"\uFF17" => "7"
|
||||||
|
|
||||||
|
# ⒎ [DIGIT SEVEN FULL STOP]
|
||||||
|
"\u248E" => "7."
|
||||||
|
|
||||||
|
# ⑺ [PARENTHESIZED DIGIT SEVEN]
|
||||||
|
"\u247A" => "(7)"
|
||||||
|
|
||||||
|
# ⁸ [SUPERSCRIPT EIGHT]
|
||||||
|
"\u2078" => "8"
|
||||||
|
|
||||||
|
# ₈ [SUBSCRIPT EIGHT]
|
||||||
|
"\u2088" => "8"
|
||||||
|
|
||||||
|
# ⑧ [CIRCLED DIGIT EIGHT]
|
||||||
|
"\u2467" => "8"
|
||||||
|
|
||||||
|
# ⓼ [DOUBLE CIRCLED DIGIT EIGHT]
|
||||||
|
"\u24FC" => "8"
|
||||||
|
|
||||||
|
# ❽ [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT]
|
||||||
|
"\u277D" => "8"
|
||||||
|
|
||||||
|
# ➇ [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT]
|
||||||
|
"\u2787" => "8"
|
||||||
|
|
||||||
|
# ➑ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT]
|
||||||
|
"\u2791" => "8"
|
||||||
|
|
||||||
|
# 8 [FULLWIDTH DIGIT EIGHT]
|
||||||
|
"\uFF18" => "8"
|
||||||
|
|
||||||
|
# ⒏ [DIGIT EIGHT FULL STOP]
|
||||||
|
"\u248F" => "8."
|
||||||
|
|
||||||
|
# ⑻ [PARENTHESIZED DIGIT EIGHT]
|
||||||
|
"\u247B" => "(8)"
|
||||||
|
|
||||||
|
# ⁹ [SUPERSCRIPT NINE]
|
||||||
|
"\u2079" => "9"
|
||||||
|
|
||||||
|
# ₉ [SUBSCRIPT NINE]
|
||||||
|
"\u2089" => "9"
|
||||||
|
|
||||||
|
# ⑨ [CIRCLED DIGIT NINE]
|
||||||
|
"\u2468" => "9"
|
||||||
|
|
||||||
|
# ⓽ [DOUBLE CIRCLED DIGIT NINE]
|
||||||
|
"\u24FD" => "9"
|
||||||
|
|
||||||
|
# ❾ [DINGBAT NEGATIVE CIRCLED DIGIT NINE]
|
||||||
|
"\u277E" => "9"
|
||||||
|
|
||||||
|
# ➈ [DINGBAT CIRCLED SANS-SERIF DIGIT NINE]
|
||||||
|
"\u2788" => "9"
|
||||||
|
|
||||||
|
# ➒ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE]
|
||||||
|
"\u2792" => "9"
|
||||||
|
|
||||||
|
# 9 [FULLWIDTH DIGIT NINE]
|
||||||
|
"\uFF19" => "9"
|
||||||
|
|
||||||
|
# ⒐ [DIGIT NINE FULL STOP]
|
||||||
|
"\u2490" => "9."
|
||||||
|
|
||||||
|
# ⑼ [PARENTHESIZED DIGIT NINE]
|
||||||
|
"\u247C" => "(9)"
|
||||||
|
|
||||||
|
# ⑩ [CIRCLED NUMBER TEN]
|
||||||
|
"\u2469" => "10"
|
||||||
|
|
||||||
|
# ⓾ [DOUBLE CIRCLED NUMBER TEN]
|
||||||
|
"\u24FE" => "10"
|
||||||
|
|
||||||
|
# ❿ [DINGBAT NEGATIVE CIRCLED NUMBER TEN]
|
||||||
|
"\u277F" => "10"
|
||||||
|
|
||||||
|
# ➉ [DINGBAT CIRCLED SANS-SERIF NUMBER TEN]
|
||||||
|
"\u2789" => "10"
|
||||||
|
|
||||||
|
# ➓ [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN]
|
||||||
|
"\u2793" => "10"
|
||||||
|
|
||||||
|
# ⒑ [NUMBER TEN FULL STOP]
|
||||||
|
"\u2491" => "10."
|
||||||
|
|
||||||
|
# ⑽ [PARENTHESIZED NUMBER TEN]
|
||||||
|
"\u247D" => "(10)"
|
||||||
|
|
||||||
|
# ⑪ [CIRCLED NUMBER ELEVEN]
|
||||||
|
"\u246A" => "11"
|
||||||
|
|
||||||
|
# ⓫ [NEGATIVE CIRCLED NUMBER ELEVEN]
|
||||||
|
"\u24EB" => "11"
|
||||||
|
|
||||||
|
# ⒒ [NUMBER ELEVEN FULL STOP]
|
||||||
|
"\u2492" => "11."
|
||||||
|
|
||||||
|
# ⑾ [PARENTHESIZED NUMBER ELEVEN]
|
||||||
|
"\u247E" => "(11)"
|
||||||
|
|
||||||
|
# ⑫ [CIRCLED NUMBER TWELVE]
|
||||||
|
"\u246B" => "12"
|
||||||
|
|
||||||
|
# ⓬ [NEGATIVE CIRCLED NUMBER TWELVE]
|
||||||
|
"\u24EC" => "12"
|
||||||
|
|
||||||
|
# ⒓ [NUMBER TWELVE FULL STOP]
|
||||||
|
"\u2493" => "12."
|
||||||
|
|
||||||
|
# ⑿ [PARENTHESIZED NUMBER TWELVE]
|
||||||
|
"\u247F" => "(12)"
|
||||||
|
|
||||||
|
# ⑬ [CIRCLED NUMBER THIRTEEN]
|
||||||
|
"\u246C" => "13"
|
||||||
|
|
||||||
|
# ⓭ [NEGATIVE CIRCLED NUMBER THIRTEEN]
|
||||||
|
"\u24ED" => "13"
|
||||||
|
|
||||||
|
# ⒔ [NUMBER THIRTEEN FULL STOP]
|
||||||
|
"\u2494" => "13."
|
||||||
|
|
||||||
|
# ⒀ [PARENTHESIZED NUMBER THIRTEEN]
|
||||||
|
"\u2480" => "(13)"
|
||||||
|
|
||||||
|
# ⑭ [CIRCLED NUMBER FOURTEEN]
|
||||||
|
"\u246D" => "14"
|
||||||
|
|
||||||
|
# ⓮ [NEGATIVE CIRCLED NUMBER FOURTEEN]
|
||||||
|
"\u24EE" => "14"
|
||||||
|
|
||||||
|
# ⒕ [NUMBER FOURTEEN FULL STOP]
|
||||||
|
"\u2495" => "14."
|
||||||
|
|
||||||
|
# ⒁ [PARENTHESIZED NUMBER FOURTEEN]
|
||||||
|
"\u2481" => "(14)"
|
||||||
|
|
||||||
|
# ⑮ [CIRCLED NUMBER FIFTEEN]
|
||||||
|
"\u246E" => "15"
|
||||||
|
|
||||||
|
# ⓯ [NEGATIVE CIRCLED NUMBER FIFTEEN]
|
||||||
|
"\u24EF" => "15"
|
||||||
|
|
||||||
|
# ⒖ [NUMBER FIFTEEN FULL STOP]
|
||||||
|
"\u2496" => "15."
|
||||||
|
|
||||||
|
# ⒂ [PARENTHESIZED NUMBER FIFTEEN]
|
||||||
|
"\u2482" => "(15)"
|
||||||
|
|
||||||
|
# ⑯ [CIRCLED NUMBER SIXTEEN]
|
||||||
|
"\u246F" => "16"
|
||||||
|
|
||||||
|
# ⓰ [NEGATIVE CIRCLED NUMBER SIXTEEN]
|
||||||
|
"\u24F0" => "16"
|
||||||
|
|
||||||
|
# ⒗ [NUMBER SIXTEEN FULL STOP]
|
||||||
|
"\u2497" => "16."
|
||||||
|
|
||||||
|
# ⒃ [PARENTHESIZED NUMBER SIXTEEN]
|
||||||
|
"\u2483" => "(16)"
|
||||||
|
|
||||||
|
# ⑰ [CIRCLED NUMBER SEVENTEEN]
|
||||||
|
"\u2470" => "17"
|
||||||
|
|
||||||
|
# ⓱ [NEGATIVE CIRCLED NUMBER SEVENTEEN]
|
||||||
|
"\u24F1" => "17"
|
||||||
|
|
||||||
|
# ⒘ [NUMBER SEVENTEEN FULL STOP]
|
||||||
|
"\u2498" => "17."
|
||||||
|
|
||||||
|
# ⒄ [PARENTHESIZED NUMBER SEVENTEEN]
|
||||||
|
"\u2484" => "(17)"
|
||||||
|
|
||||||
|
# ⑱ [CIRCLED NUMBER EIGHTEEN]
|
||||||
|
"\u2471" => "18"
|
||||||
|
|
||||||
|
# ⓲ [NEGATIVE CIRCLED NUMBER EIGHTEEN]
|
||||||
|
"\u24F2" => "18"
|
||||||
|
|
||||||
|
# ⒙ [NUMBER EIGHTEEN FULL STOP]
|
||||||
|
"\u2499" => "18."
|
||||||
|
|
||||||
|
# ⒅ [PARENTHESIZED NUMBER EIGHTEEN]
|
||||||
|
"\u2485" => "(18)"
|
||||||
|
|
||||||
|
# ⑲ [CIRCLED NUMBER NINETEEN]
|
||||||
|
"\u2472" => "19"
|
||||||
|
|
||||||
|
# ⓳ [NEGATIVE CIRCLED NUMBER NINETEEN]
|
||||||
|
"\u24F3" => "19"
|
||||||
|
|
||||||
|
# ⒚ [NUMBER NINETEEN FULL STOP]
|
||||||
|
"\u249A" => "19."
|
||||||
|
|
||||||
|
# ⒆ [PARENTHESIZED NUMBER NINETEEN]
|
||||||
|
"\u2486" => "(19)"
|
||||||
|
|
||||||
|
# ⑳ [CIRCLED NUMBER TWENTY]
|
||||||
|
"\u2473" => "20"
|
||||||
|
|
||||||
|
# ⓴ [NEGATIVE CIRCLED NUMBER TWENTY]
|
||||||
|
"\u24F4" => "20"
|
||||||
|
|
||||||
|
# ⒛ [NUMBER TWENTY FULL STOP]
|
||||||
|
"\u249B" => "20."
|
||||||
|
|
||||||
|
# ⒇ [PARENTHESIZED NUMBER TWENTY]
|
||||||
|
"\u2487" => "(20)"
|
||||||
|
|
||||||
|
# « [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK]
|
||||||
|
"\u00AB" => "\""
|
||||||
|
|
||||||
|
# » [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK]
|
||||||
|
"\u00BB" => "\""
|
||||||
|
|
||||||
|
# “ [LEFT DOUBLE QUOTATION MARK]
|
||||||
|
"\u201C" => "\""
|
||||||
|
|
||||||
|
# ” [RIGHT DOUBLE QUOTATION MARK]
|
||||||
|
"\u201D" => "\""
|
||||||
|
|
||||||
|
# „ [DOUBLE LOW-9 QUOTATION MARK]
|
||||||
|
"\u201E" => "\""
|
||||||
|
|
||||||
|
# ″ [DOUBLE PRIME]
|
||||||
|
"\u2033" => "\""
|
||||||
|
|
||||||
|
# ‶ [REVERSED DOUBLE PRIME]
|
||||||
|
"\u2036" => "\""
|
||||||
|
|
||||||
|
# ❝ [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT]
|
||||||
|
"\u275D" => "\""
|
||||||
|
|
||||||
|
# ❞ [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT]
|
||||||
|
"\u275E" => "\""
|
||||||
|
|
||||||
|
# ❮ [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT]
|
||||||
|
"\u276E" => "\""
|
||||||
|
|
||||||
|
# ❯ [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT]
|
||||||
|
"\u276F" => "\""
|
||||||
|
|
||||||
|
# " [FULLWIDTH QUOTATION MARK]
|
||||||
|
"\uFF02" => "\""
|
||||||
|
|
||||||
|
# ‘ [LEFT SINGLE QUOTATION MARK]
|
||||||
|
"\u2018" => "\'"
|
||||||
|
|
||||||
|
# ’ [RIGHT SINGLE QUOTATION MARK]
|
||||||
|
"\u2019" => "\'"
|
||||||
|
|
||||||
|
# ‚ [SINGLE LOW-9 QUOTATION MARK]
|
||||||
|
"\u201A" => "\'"
|
||||||
|
|
||||||
|
# ‛ [SINGLE HIGH-REVERSED-9 QUOTATION MARK]
|
||||||
|
"\u201B" => "\'"
|
||||||
|
|
||||||
|
# ′ [PRIME]
|
||||||
|
"\u2032" => "\'"
|
||||||
|
|
||||||
|
# ‵ [REVERSED PRIME]
|
||||||
|
"\u2035" => "\'"
|
||||||
|
|
||||||
|
# ‹ [SINGLE LEFT-POINTING ANGLE QUOTATION MARK]
|
||||||
|
"\u2039" => "\'"
|
||||||
|
|
||||||
|
# › [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK]
|
||||||
|
"\u203A" => "\'"
|
||||||
|
|
||||||
|
# ❛ [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT]
|
||||||
|
"\u275B" => "\'"
|
||||||
|
|
||||||
|
# ❜ [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT]
|
||||||
|
"\u275C" => "\'"
|
||||||
|
|
||||||
|
# ' [FULLWIDTH APOSTROPHE]
|
||||||
|
"\uFF07" => "\'"
|
||||||
|
|
||||||
|
# ‐ [HYPHEN]
|
||||||
|
"\u2010" => "-"
|
||||||
|
|
||||||
|
# ‑ [NON-BREAKING HYPHEN]
|
||||||
|
"\u2011" => "-"
|
||||||
|
|
||||||
|
# ‒ [FIGURE DASH]
|
||||||
|
"\u2012" => "-"
|
||||||
|
|
||||||
|
# – [EN DASH]
|
||||||
|
"\u2013" => "-"
|
||||||
|
|
||||||
|
# — [EM DASH]
|
||||||
|
"\u2014" => "-"
|
||||||
|
|
||||||
|
# ⁻ [SUPERSCRIPT MINUS]
|
||||||
|
"\u207B" => "-"
|
||||||
|
|
||||||
|
# ₋ [SUBSCRIPT MINUS]
|
||||||
|
"\u208B" => "-"
|
||||||
|
|
||||||
|
# - [FULLWIDTH HYPHEN-MINUS]
|
||||||
|
"\uFF0D" => "-"
|
||||||
|
|
||||||
|
# ⁅ [LEFT SQUARE BRACKET WITH QUILL]
|
||||||
|
"\u2045" => "["
|
||||||
|
|
||||||
|
# ❲ [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT]
|
||||||
|
"\u2772" => "["
|
||||||
|
|
||||||
|
# [ [FULLWIDTH LEFT SQUARE BRACKET]
|
||||||
|
"\uFF3B" => "["
|
||||||
|
|
||||||
|
# ⁆ [RIGHT SQUARE BRACKET WITH QUILL]
|
||||||
|
"\u2046" => "]"
|
||||||
|
|
||||||
|
# ❳ [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT]
|
||||||
|
"\u2773" => "]"
|
||||||
|
|
||||||
|
# ] [FULLWIDTH RIGHT SQUARE BRACKET]
|
||||||
|
"\uFF3D" => "]"
|
||||||
|
|
||||||
|
# ⁽ [SUPERSCRIPT LEFT PARENTHESIS]
|
||||||
|
"\u207D" => "("
|
||||||
|
|
||||||
|
# ₍ [SUBSCRIPT LEFT PARENTHESIS]
|
||||||
|
"\u208D" => "("
|
||||||
|
|
||||||
|
# ❨ [MEDIUM LEFT PARENTHESIS ORNAMENT]
|
||||||
|
"\u2768" => "("
|
||||||
|
|
||||||
|
# ❪ [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT]
|
||||||
|
"\u276A" => "("
|
||||||
|
|
||||||
|
# ( [FULLWIDTH LEFT PARENTHESIS]
|
||||||
|
"\uFF08" => "("
|
||||||
|
|
||||||
|
# ⸨ [LEFT DOUBLE PARENTHESIS]
|
||||||
|
"\u2E28" => "(("
|
||||||
|
|
||||||
|
# ⁾ [SUPERSCRIPT RIGHT PARENTHESIS]
|
||||||
|
"\u207E" => ")"
|
||||||
|
|
||||||
|
# ₎ [SUBSCRIPT RIGHT PARENTHESIS]
|
||||||
|
"\u208E" => ")"
|
||||||
|
|
||||||
|
# ❩ [MEDIUM RIGHT PARENTHESIS ORNAMENT]
|
||||||
|
"\u2769" => ")"
|
||||||
|
|
||||||
|
# ❫ [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT]
|
||||||
|
"\u276B" => ")"
|
||||||
|
|
||||||
|
# ) [FULLWIDTH RIGHT PARENTHESIS]
|
||||||
|
"\uFF09" => ")"
|
||||||
|
|
||||||
|
# ⸩ [RIGHT DOUBLE PARENTHESIS]
|
||||||
|
"\u2E29" => "))"
|
||||||
|
|
||||||
|
# ❬ [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT]
|
||||||
|
"\u276C" => "<"
|
||||||
|
|
||||||
|
# ❰ [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT]
|
||||||
|
"\u2770" => "<"
|
||||||
|
|
||||||
|
# < [FULLWIDTH LESS-THAN SIGN]
|
||||||
|
"\uFF1C" => "<"
|
||||||
|
|
||||||
|
# ❭ [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT]
|
||||||
|
"\u276D" => ">"
|
||||||
|
|
||||||
|
# ❱ [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT]
|
||||||
|
"\u2771" => ">"
|
||||||
|
|
||||||
|
# > [FULLWIDTH GREATER-THAN SIGN]
|
||||||
|
"\uFF1E" => ">"
|
||||||
|
|
||||||
|
# ❴ [MEDIUM LEFT CURLY BRACKET ORNAMENT]
|
||||||
|
"\u2774" => "{"
|
||||||
|
|
||||||
|
# { [FULLWIDTH LEFT CURLY BRACKET]
|
||||||
|
"\uFF5B" => "{"
|
||||||
|
|
||||||
|
# ❵ [MEDIUM RIGHT CURLY BRACKET ORNAMENT]
|
||||||
|
"\u2775" => "}"
|
||||||
|
|
||||||
|
# } [FULLWIDTH RIGHT CURLY BRACKET]
|
||||||
|
"\uFF5D" => "}"
|
||||||
|
|
||||||
|
# ⁺ [SUPERSCRIPT PLUS SIGN]
|
||||||
|
"\u207A" => "+"
|
||||||
|
|
||||||
|
# ₊ [SUBSCRIPT PLUS SIGN]
|
||||||
|
"\u208A" => "+"
|
||||||
|
|
||||||
|
# + [FULLWIDTH PLUS SIGN]
|
||||||
|
"\uFF0B" => "+"
|
||||||
|
|
||||||
|
# ⁼ [SUPERSCRIPT EQUALS SIGN]
|
||||||
|
"\u207C" => "="
|
||||||
|
|
||||||
|
# ₌ [SUBSCRIPT EQUALS SIGN]
|
||||||
|
"\u208C" => "="
|
||||||
|
|
||||||
|
# = [FULLWIDTH EQUALS SIGN]
|
||||||
|
"\uFF1D" => "="
|
||||||
|
|
||||||
|
# ! [FULLWIDTH EXCLAMATION MARK]
|
||||||
|
"\uFF01" => "!"
|
||||||
|
|
||||||
|
# ‼ [DOUBLE EXCLAMATION MARK]
|
||||||
|
"\u203C" => "!!"
|
||||||
|
|
||||||
|
# ⁉ [EXCLAMATION QUESTION MARK]
|
||||||
|
"\u2049" => "!?"
|
||||||
|
|
||||||
|
# # [FULLWIDTH NUMBER SIGN]
|
||||||
|
"\uFF03" => "#"
|
||||||
|
|
||||||
|
# $ [FULLWIDTH DOLLAR SIGN]
|
||||||
|
"\uFF04" => "$"
|
||||||
|
|
||||||
|
# ⁒ [COMMERCIAL MINUS SIGN]
|
||||||
|
"\u2052" => "%"
|
||||||
|
|
||||||
|
# % [FULLWIDTH PERCENT SIGN]
|
||||||
|
"\uFF05" => "%"
|
||||||
|
|
||||||
|
# & [FULLWIDTH AMPERSAND]
|
||||||
|
"\uFF06" => "&"
|
||||||
|
|
||||||
|
# ⁎ [LOW ASTERISK]
|
||||||
|
"\u204E" => "*"
|
||||||
|
|
||||||
|
# * [FULLWIDTH ASTERISK]
|
||||||
|
"\uFF0A" => "*"
|
||||||
|
|
||||||
|
# , [FULLWIDTH COMMA]
|
||||||
|
"\uFF0C" => ","
|
||||||
|
|
||||||
|
# . [FULLWIDTH FULL STOP]
|
||||||
|
"\uFF0E" => "."
|
||||||
|
|
||||||
|
# ⁄ [FRACTION SLASH]
|
||||||
|
"\u2044" => "/"
|
||||||
|
|
||||||
|
# / [FULLWIDTH SOLIDUS]
|
||||||
|
"\uFF0F" => "/"
|
||||||
|
|
||||||
|
# : [FULLWIDTH COLON]
|
||||||
|
"\uFF1A" => ":"
|
||||||
|
|
||||||
|
# ⁏ [REVERSED SEMICOLON]
|
||||||
|
"\u204F" => ";"
|
||||||
|
|
||||||
|
# ; [FULLWIDTH SEMICOLON]
|
||||||
|
"\uFF1B" => ";"
|
||||||
|
|
||||||
|
# ? [FULLWIDTH QUESTION MARK]
|
||||||
|
"\uFF1F" => "?"
|
||||||
|
|
||||||
|
# ⁇ [DOUBLE QUESTION MARK]
|
||||||
|
"\u2047" => "??"
|
||||||
|
|
||||||
|
# ⁈ [QUESTION EXCLAMATION MARK]
|
||||||
|
"\u2048" => "?!"
|
||||||
|
|
||||||
|
# @ [FULLWIDTH COMMERCIAL AT]
|
||||||
|
"\uFF20" => "@"
|
||||||
|
|
||||||
|
# \ [FULLWIDTH REVERSE SOLIDUS]
|
||||||
|
"\uFF3C" => "\\"
|
||||||
|
|
||||||
|
# ‸ [CARET]
|
||||||
|
"\u2038" => "^"
|
||||||
|
|
||||||
|
# ^ [FULLWIDTH CIRCUMFLEX ACCENT]
|
||||||
|
"\uFF3E" => "^"
|
||||||
|
|
||||||
|
# _ [FULLWIDTH LOW LINE]
|
||||||
|
"\uFF3F" => "_"
|
||||||
|
|
||||||
|
# ⁓ [SWUNG DASH]
|
||||||
|
"\u2053" => "~"
|
||||||
|
|
||||||
|
# ~ [FULLWIDTH TILDE]
|
||||||
|
"\uFF5E" => "~"
|
||||||
|
|
||||||
|
################################################################
|
||||||
|
# Below is the Perl script used to generate the above mappings #
|
||||||
|
# from ASCIIFoldingFilter.java: #
|
||||||
|
################################################################
|
||||||
|
#
|
||||||
|
# #!/usr/bin/perl
|
||||||
|
#
|
||||||
|
# use warnings;
|
||||||
|
# use strict;
|
||||||
|
#
|
||||||
|
# my @source_chars = ();
|
||||||
|
# my @source_char_descriptions = ();
|
||||||
|
# my $target = '';
|
||||||
|
#
|
||||||
|
# while (<>) {
|
||||||
|
# if (/case\s+'(\\u[A-F0-9]+)':\s*\/\/\s*(.*)/i) {
|
||||||
|
# push @source_chars, $1;
|
||||||
|
# push @source_char_descriptions, $2;
|
||||||
|
# next;
|
||||||
|
# }
|
||||||
|
# if (/output\[[^\]]+\]\s*=\s*'(\\'|\\\\|.)'/) {
|
||||||
|
# $target .= $1;
|
||||||
|
# next;
|
||||||
|
# }
|
||||||
|
# if (/break;/) {
|
||||||
|
# $target = "\\\"" if ($target eq '"');
|
||||||
|
# for my $source_char_num (0..$#source_chars) {
|
||||||
|
# print "# $source_char_descriptions[$source_char_num]\n";
|
||||||
|
# print "\"$source_chars[$source_char_num]\" => \"$target\"\n\n";
|
||||||
|
# }
|
||||||
|
# @source_chars = ();
|
||||||
|
# @source_char_descriptions = ();
|
||||||
|
# $target = '';
|
||||||
|
# }
|
||||||
|
# }
|
|
@ -0,0 +1,21 @@
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
#-----------------------------------------------------------------------
|
||||||
|
# Use a protected word file to protect against the stemmer reducing two
|
||||||
|
# unrelated words to the same base word.
|
||||||
|
|
||||||
|
# Some non-words that normally won't be encountered,
|
||||||
|
# just to test that they won't be stemmed.
|
||||||
|
dontstems
|
||||||
|
zwhacky
|
||||||
|
|
|
@ -0,0 +1,1823 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" ?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!--
|
||||||
|
For more details about configurations options that may appear in
|
||||||
|
this file, see http://wiki.apache.org/solr/SolrConfigXml.
|
||||||
|
-->
|
||||||
|
<config>
|
||||||
|
<!-- In all configuration below, a prefix of "solr." for class names
|
||||||
|
is an alias that causes solr to search appropriate packages,
|
||||||
|
including org.apache.solr.(search|update|request|core|analysis)
|
||||||
|
|
||||||
|
You may also specify a fully qualified Java classname if you
|
||||||
|
have your own custom plugins.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Controls what version of Lucene various components of Solr
|
||||||
|
adhere to. Generally, you want to use the latest version to
|
||||||
|
get all bug fixes and improvements. It is highly recommended
|
||||||
|
that you fully re-index after changing this setting as it can
|
||||||
|
affect both how text is indexed and queried.
|
||||||
|
-->
|
||||||
|
<luceneMatchVersion>4.4</luceneMatchVersion>
|
||||||
|
|
||||||
|
<!-- <lib/> directives can be used to instruct Solr to load an Jars
|
||||||
|
identified and use them to resolve any "plugins" specified in
|
||||||
|
your solrconfig.xml or schema.xml (ie: Analyzers, Request
|
||||||
|
Handlers, etc...).
|
||||||
|
|
||||||
|
All directories and paths are resolved relative to the
|
||||||
|
instanceDir.
|
||||||
|
|
||||||
|
Please note that <lib/> directives are processed in the order
|
||||||
|
that they appear in your solrconfig.xml file, and are "stacked"
|
||||||
|
on top of each other when building a ClassLoader - so if you have
|
||||||
|
plugin jars with dependencies on other jars, the "lower level"
|
||||||
|
dependency jars should be loaded first.
|
||||||
|
|
||||||
|
If a "./lib" directory exists in your instanceDir, all files
|
||||||
|
found in it are included as if you had used the following
|
||||||
|
syntax...
|
||||||
|
|
||||||
|
<lib dir="./lib" />
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- A 'dir' option by itself adds any files found in the directory
|
||||||
|
to the classpath, this is useful for including all jars in a
|
||||||
|
directory.
|
||||||
|
|
||||||
|
When a 'regex' is specified in addition to a 'dir', only the
|
||||||
|
files in that directory which completely match the regex
|
||||||
|
(anchored on both ends) will be included.
|
||||||
|
|
||||||
|
If a 'dir' option (with or without a regex) is used and nothing
|
||||||
|
is found that matches, a warning will be logged.
|
||||||
|
|
||||||
|
The examples below can be used to load some solr-contribs along
|
||||||
|
with their external dependencies.
|
||||||
|
-->
|
||||||
|
<lib dir="../../../contrib/extraction/lib" regex=".*\.jar" />
|
||||||
|
<lib dir="../../../dist/" regex="solr-cell-\d.*\.jar" />
|
||||||
|
|
||||||
|
<lib dir="../../../contrib/clustering/lib/" regex=".*\.jar" />
|
||||||
|
<lib dir="../../../dist/" regex="solr-clustering-\d.*\.jar" />
|
||||||
|
|
||||||
|
<lib dir="../../../contrib/langid/lib/" regex=".*\.jar" />
|
||||||
|
<lib dir="../../../dist/" regex="solr-langid-\d.*\.jar" />
|
||||||
|
|
||||||
|
<lib dir="../../../contrib/velocity/lib" regex=".*\.jar" />
|
||||||
|
<lib dir="../../../dist/" regex="solr-velocity-\d.*\.jar" />
|
||||||
|
|
||||||
|
<!-- an exact 'path' can be used instead of a 'dir' to specify a
|
||||||
|
specific jar file. This will cause a serious error to be logged
|
||||||
|
if it can't be loaded.
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<lib path="../a-jar-that-does-not-exist.jar" />
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Data Directory
|
||||||
|
|
||||||
|
Used to specify an alternate directory to hold all index data
|
||||||
|
other than the default ./data under the Solr home. If
|
||||||
|
replication is in use, this should match the replication
|
||||||
|
configuration.
|
||||||
|
-->
|
||||||
|
<dataDir>${solr.data.dir:}</dataDir>
|
||||||
|
|
||||||
|
|
||||||
|
<!-- The DirectoryFactory to use for indexes.
|
||||||
|
|
||||||
|
solr.StandardDirectoryFactory is filesystem
|
||||||
|
based and tries to pick the best implementation for the current
|
||||||
|
JVM and platform. solr.NRTCachingDirectoryFactory, the default,
|
||||||
|
wraps solr.StandardDirectoryFactory and caches small files in memory
|
||||||
|
for better NRT performance.
|
||||||
|
|
||||||
|
One can force a particular implementation via solr.MMapDirectoryFactory,
|
||||||
|
solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory.
|
||||||
|
|
||||||
|
solr.RAMDirectoryFactory is memory based, not
|
||||||
|
persistent, and doesn't work with replication.
|
||||||
|
-->
|
||||||
|
<directoryFactory name="DirectoryFactory"
|
||||||
|
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
|
||||||
|
|
||||||
|
<!-- The CodecFactory for defining the format of the inverted index.
|
||||||
|
The default implementation is SchemaCodecFactory, which is the official Lucene
|
||||||
|
index format, but hooks into the schema to provide per-field customization of
|
||||||
|
the postings lists and per-document values in the fieldType element
|
||||||
|
(postingsFormat/docValuesFormat). Note that most of the alternative implementations
|
||||||
|
are experimental, so if you choose to customize the index format, its a good
|
||||||
|
idea to convert back to the official format e.g. via IndexWriter.addIndexes(IndexReader)
|
||||||
|
before upgrading to a newer version to avoid unnecessary reindexing.
|
||||||
|
-->
|
||||||
|
<codecFactory class="solr.SchemaCodecFactory"/>
|
||||||
|
|
||||||
|
<!-- To enable dynamic schema REST APIs, use the following for <schemaFactory>:
|
||||||
|
|
||||||
|
<schemaFactory class="ManagedIndexSchemaFactory">
|
||||||
|
<bool name="mutable">true</bool>
|
||||||
|
<str name="managedSchemaResourceName">managed-schema</str>
|
||||||
|
</schemaFactory>
|
||||||
|
|
||||||
|
When ManagedIndexSchemaFactory is specified, Solr will load the schema from
|
||||||
|
he resource named in 'managedSchemaResourceName', rather than from schema.xml.
|
||||||
|
Note that the managed schema resource CANNOT be named schema.xml. If the managed
|
||||||
|
schema does not exist, Solr will create it after reading schema.xml, then rename
|
||||||
|
'schema.xml' to 'schema.xml.bak'.
|
||||||
|
|
||||||
|
Do NOT hand edit the managed schema - external modifications will be ignored and
|
||||||
|
overwritten as a result of schema modification REST API calls.
|
||||||
|
|
||||||
|
When ManagedIndexSchemaFactory is specified with mutable = true, schema
|
||||||
|
modification REST API calls will be allowed; otherwise, error responses will be
|
||||||
|
sent back for these requests.
|
||||||
|
-->
|
||||||
|
<schemaFactory class="ClassicIndexSchemaFactory"/>
|
||||||
|
|
||||||
|
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
Index Config - These settings control low-level behavior of indexing
|
||||||
|
Most example settings here show the default value, but are commented
|
||||||
|
out, to more easily see where customizations have been made.
|
||||||
|
|
||||||
|
Note: This replaces <indexDefaults> and <mainIndex> from older versions
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
|
||||||
|
<indexConfig>
|
||||||
|
<!-- maxFieldLength was removed in 4.0. To get similar behavior, include a
|
||||||
|
LimitTokenCountFilterFactory in your fieldType definition. E.g.
|
||||||
|
<filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/>
|
||||||
|
-->
|
||||||
|
<!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 -->
|
||||||
|
<!-- <writeLockTimeout>1000</writeLockTimeout> -->
|
||||||
|
|
||||||
|
<!-- The maximum number of simultaneous threads that may be
|
||||||
|
indexing documents at once in IndexWriter; if more than this
|
||||||
|
many threads arrive they will wait for others to finish.
|
||||||
|
Default in Solr/Lucene is 8. -->
|
||||||
|
<!-- <maxIndexingThreads>8</maxIndexingThreads> -->
|
||||||
|
|
||||||
|
<!-- Expert: Enabling compound file will use less files for the index,
|
||||||
|
using fewer file descriptors on the expense of performance decrease.
|
||||||
|
Default in Lucene is "true". Default in Solr is "false" (since 3.6) -->
|
||||||
|
<!-- <useCompoundFile>false</useCompoundFile> -->
|
||||||
|
|
||||||
|
<!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene
|
||||||
|
indexing for buffering added documents and deletions before they are
|
||||||
|
flushed to the Directory.
|
||||||
|
maxBufferedDocs sets a limit on the number of documents buffered
|
||||||
|
before flushing.
|
||||||
|
If both ramBufferSizeMB and maxBufferedDocs is set, then
|
||||||
|
Lucene will flush based on whichever limit is hit first.
|
||||||
|
The default is 100 MB. -->
|
||||||
|
<!-- <ramBufferSizeMB>100</ramBufferSizeMB> -->
|
||||||
|
<!-- <maxBufferedDocs>1000</maxBufferedDocs> -->
|
||||||
|
|
||||||
|
<!-- Expert: Merge Policy
|
||||||
|
The Merge Policy in Lucene controls how merging of segments is done.
|
||||||
|
The default since Solr/Lucene 3.3 is TieredMergePolicy.
|
||||||
|
The default since Lucene 2.3 was the LogByteSizeMergePolicy,
|
||||||
|
Even older versions of Lucene used LogDocMergePolicy.
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<mergePolicy class="org.apache.lucene.index.TieredMergePolicy">
|
||||||
|
<int name="maxMergeAtOnce">10</int>
|
||||||
|
<int name="segmentsPerTier">10</int>
|
||||||
|
</mergePolicy>
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Merge Factor
|
||||||
|
The merge factor controls how many segments will get merged at a time.
|
||||||
|
For TieredMergePolicy, mergeFactor is a convenience parameter which
|
||||||
|
will set both MaxMergeAtOnce and SegmentsPerTier at once.
|
||||||
|
For LogByteSizeMergePolicy, mergeFactor decides how many new segments
|
||||||
|
will be allowed before they are merged into one.
|
||||||
|
Default is 10 for both merge policies.
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<mergeFactor>10</mergeFactor>
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Expert: Merge Scheduler
|
||||||
|
The Merge Scheduler in Lucene controls how merges are
|
||||||
|
performed. The ConcurrentMergeScheduler (Lucene 2.3 default)
|
||||||
|
can perform merges in the background using separate threads.
|
||||||
|
The SerialMergeScheduler (Lucene 2.2 default) does not.
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/>
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- LockFactory
|
||||||
|
|
||||||
|
This option specifies which Lucene LockFactory implementation
|
||||||
|
to use.
|
||||||
|
|
||||||
|
single = SingleInstanceLockFactory - suggested for a
|
||||||
|
read-only index or when there is no possibility of
|
||||||
|
another process trying to modify the index.
|
||||||
|
native = NativeFSLockFactory - uses OS native file locking.
|
||||||
|
Do not use when multiple solr webapps in the same
|
||||||
|
JVM are attempting to share a single index.
|
||||||
|
simple = SimpleFSLockFactory - uses a plain file for locking
|
||||||
|
|
||||||
|
Defaults: 'native' is default for Solr3.6 and later, otherwise
|
||||||
|
'simple' is the default
|
||||||
|
|
||||||
|
More details on the nuances of each LockFactory...
|
||||||
|
http://wiki.apache.org/lucene-java/AvailableLockFactories
|
||||||
|
-->
|
||||||
|
<lockType>${solr.lock.type:native}</lockType>
|
||||||
|
|
||||||
|
<!-- Unlock On Startup
|
||||||
|
|
||||||
|
If true, unlock any held write or commit locks on startup.
|
||||||
|
This defeats the locking mechanism that allows multiple
|
||||||
|
processes to safely access a lucene index, and should be used
|
||||||
|
with care. Default is "false".
|
||||||
|
|
||||||
|
This is not needed if lock type is 'single'
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<unlockOnStartup>false</unlockOnStartup>
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Expert: Controls how often Lucene loads terms into memory
|
||||||
|
Default is 128 and is likely good for most everyone.
|
||||||
|
-->
|
||||||
|
<!-- <termIndexInterval>128</termIndexInterval> -->
|
||||||
|
|
||||||
|
<!-- If true, IndexReaders will be reopened (often more efficient)
|
||||||
|
instead of closed and then opened. Default: true
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<reopenReaders>true</reopenReaders>
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Commit Deletion Policy
|
||||||
|
Custom deletion policies can be specified here. The class must
|
||||||
|
implement org.apache.lucene.index.IndexDeletionPolicy.
|
||||||
|
|
||||||
|
The default Solr IndexDeletionPolicy implementation supports
|
||||||
|
deleting index commit points on number of commits, age of
|
||||||
|
commit point and optimized status.
|
||||||
|
|
||||||
|
The latest commit point should always be preserved regardless
|
||||||
|
of the criteria.
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<deletionPolicy class="solr.SolrDeletionPolicy">
|
||||||
|
-->
|
||||||
|
<!-- The number of commit points to be kept -->
|
||||||
|
<!-- <str name="maxCommitsToKeep">1</str> -->
|
||||||
|
<!-- The number of optimized commit points to be kept -->
|
||||||
|
<!-- <str name="maxOptimizedCommitsToKeep">0</str> -->
|
||||||
|
<!--
|
||||||
|
Delete all commit points once they have reached the given age.
|
||||||
|
Supports DateMathParser syntax e.g.
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<str name="maxCommitAge">30MINUTES</str>
|
||||||
|
<str name="maxCommitAge">1DAY</str>
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
</deletionPolicy>
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Lucene Infostream
|
||||||
|
|
||||||
|
To aid in advanced debugging, Lucene provides an "InfoStream"
|
||||||
|
of detailed information when indexing.
|
||||||
|
|
||||||
|
Setting the value to true will instruct the underlying Lucene
|
||||||
|
IndexWriter to write its info stream to solr's log. By default,
|
||||||
|
this is enabled here, and controlled through log4j.properties.
|
||||||
|
-->
|
||||||
|
<infoStream>true</infoStream>
|
||||||
|
</indexConfig>
|
||||||
|
|
||||||
|
|
||||||
|
<!-- JMX
|
||||||
|
|
||||||
|
This example enables JMX if and only if an existing MBeanServer
|
||||||
|
is found, use this if you want to configure JMX through JVM
|
||||||
|
parameters. Remove this to disable exposing Solr configuration
|
||||||
|
and statistics to JMX.
|
||||||
|
|
||||||
|
For more details see http://wiki.apache.org/solr/SolrJmx
|
||||||
|
-->
|
||||||
|
<jmx />
|
||||||
|
<!-- If you want to connect to a particular server, specify the
|
||||||
|
agentId
|
||||||
|
-->
|
||||||
|
<!-- <jmx agentId="myAgent" /> -->
|
||||||
|
<!-- If you want to start a new MBeanServer, specify the serviceUrl -->
|
||||||
|
<!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/>
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- The default high-performance update handler -->
|
||||||
|
<updateHandler class="solr.DirectUpdateHandler2">
|
||||||
|
|
||||||
|
<!-- Enables a transaction log, used for real-time get, durability, and
|
||||||
|
and solr cloud replica recovery. The log can grow as big as
|
||||||
|
uncommitted changes to the index, so use of a hard autoCommit
|
||||||
|
is recommended (see below).
|
||||||
|
"dir" - the target directory for transaction logs, defaults to the
|
||||||
|
solr data directory. -->
|
||||||
|
<updateLog>
|
||||||
|
<str name="dir">${solr.ulog.dir:}</str>
|
||||||
|
</updateLog>
|
||||||
|
|
||||||
|
<!-- AutoCommit
|
||||||
|
|
||||||
|
Perform a hard commit automatically under certain conditions.
|
||||||
|
Instead of enabling autoCommit, consider using "commitWithin"
|
||||||
|
when adding documents.
|
||||||
|
|
||||||
|
http://wiki.apache.org/solr/UpdateXmlMessages
|
||||||
|
|
||||||
|
maxDocs - Maximum number of documents to add since the last
|
||||||
|
commit before automatically triggering a new commit.
|
||||||
|
|
||||||
|
maxTime - Maximum amount of time in ms that is allowed to pass
|
||||||
|
since a document was added before automatically
|
||||||
|
triggering a new commit.
|
||||||
|
openSearcher - if false, the commit causes recent index changes
|
||||||
|
to be flushed to stable storage, but does not cause a new
|
||||||
|
searcher to be opened to make those changes visible.
|
||||||
|
|
||||||
|
If the updateLog is enabled, then it's highly recommended to
|
||||||
|
have some sort of hard autoCommit to limit the log size.
|
||||||
|
-->
|
||||||
|
<autoCommit>
|
||||||
|
<maxTime>${solr.autoCommit.maxTime:15000}</maxTime>
|
||||||
|
<openSearcher>false</openSearcher>
|
||||||
|
</autoCommit>
|
||||||
|
|
||||||
|
<!-- softAutoCommit is like autoCommit except it causes a
|
||||||
|
'soft' commit which only ensures that changes are visible
|
||||||
|
but does not ensure that data is synced to disk. This is
|
||||||
|
faster and more near-realtime friendly than a hard commit.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<autoSoftCommit>
|
||||||
|
<maxTime>${solr.autoSoftCommit.maxTime:-1}</maxTime>
|
||||||
|
</autoSoftCommit>
|
||||||
|
|
||||||
|
<!-- Update Related Event Listeners
|
||||||
|
|
||||||
|
Various IndexWriter related events can trigger Listeners to
|
||||||
|
take actions.
|
||||||
|
|
||||||
|
postCommit - fired after every commit or optimize command
|
||||||
|
postOptimize - fired after every optimize command
|
||||||
|
-->
|
||||||
|
<!-- The RunExecutableListener executes an external command from a
|
||||||
|
hook such as postCommit or postOptimize.
|
||||||
|
|
||||||
|
exe - the name of the executable to run
|
||||||
|
dir - dir to use as the current working directory. (default=".")
|
||||||
|
wait - the calling thread waits until the executable returns.
|
||||||
|
(default="true")
|
||||||
|
args - the arguments to pass to the program. (default is none)
|
||||||
|
env - environment variables to set. (default is none)
|
||||||
|
-->
|
||||||
|
<!-- This example shows how RunExecutableListener could be used
|
||||||
|
with the script based replication...
|
||||||
|
http://wiki.apache.org/solr/CollectionDistribution
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<listener event="postCommit" class="solr.RunExecutableListener">
|
||||||
|
<str name="exe">solr/bin/snapshooter</str>
|
||||||
|
<str name="dir">.</str>
|
||||||
|
<bool name="wait">true</bool>
|
||||||
|
<arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
|
||||||
|
<arr name="env"> <str>MYVAR=val1</str> </arr>
|
||||||
|
</listener>
|
||||||
|
-->
|
||||||
|
|
||||||
|
</updateHandler>
|
||||||
|
|
||||||
|
<!-- IndexReaderFactory
|
||||||
|
|
||||||
|
Use the following format to specify a custom IndexReaderFactory,
|
||||||
|
which allows for alternate IndexReader implementations.
|
||||||
|
|
||||||
|
** Experimental Feature **
|
||||||
|
|
||||||
|
Please note - Using a custom IndexReaderFactory may prevent
|
||||||
|
certain other features from working. The API to
|
||||||
|
IndexReaderFactory may change without warning or may even be
|
||||||
|
removed from future releases if the problems cannot be
|
||||||
|
resolved.
|
||||||
|
|
||||||
|
|
||||||
|
** Features that may not work with custom IndexReaderFactory **
|
||||||
|
|
||||||
|
The ReplicationHandler assumes a disk-resident index. Using a
|
||||||
|
custom IndexReader implementation may cause incompatibility
|
||||||
|
with ReplicationHandler and may cause replication to not work
|
||||||
|
correctly. See SOLR-1366 for details.
|
||||||
|
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<indexReaderFactory name="IndexReaderFactory" class="package.class">
|
||||||
|
<str name="someArg">Some Value</str>
|
||||||
|
</indexReaderFactory >
|
||||||
|
-->
|
||||||
|
<!-- By explicitly declaring the Factory, the termIndexDivisor can
|
||||||
|
be specified.
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<indexReaderFactory name="IndexReaderFactory"
|
||||||
|
class="solr.StandardIndexReaderFactory">
|
||||||
|
<int name="setTermIndexDivisor">12</int>
|
||||||
|
</indexReaderFactory >
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
Query section - these settings control query time things like caches
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
|
||||||
|
<query>
|
||||||
|
<!-- Max Boolean Clauses
|
||||||
|
|
||||||
|
Maximum number of clauses in each BooleanQuery, an exception
|
||||||
|
is thrown if exceeded.
|
||||||
|
|
||||||
|
** WARNING **
|
||||||
|
|
||||||
|
This option actually modifies a global Lucene property that
|
||||||
|
will affect all SolrCores. If multiple solrconfig.xml files
|
||||||
|
disagree on this property, the value at any given moment will
|
||||||
|
be based on the last SolrCore to be initialized.
|
||||||
|
|
||||||
|
-->
|
||||||
|
<maxBooleanClauses>1024</maxBooleanClauses>
|
||||||
|
|
||||||
|
|
||||||
|
<!-- Solr Internal Query Caches
|
||||||
|
|
||||||
|
There are two implementations of cache available for Solr,
|
||||||
|
LRUCache, based on a synchronized LinkedHashMap, and
|
||||||
|
FastLRUCache, based on a ConcurrentHashMap.
|
||||||
|
|
||||||
|
FastLRUCache has faster gets and slower puts in single
|
||||||
|
threaded operation and thus is generally faster than LRUCache
|
||||||
|
when the hit ratio of the cache is high (> 75%), and may be
|
||||||
|
faster under other scenarios on multi-cpu systems.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Filter Cache
|
||||||
|
|
||||||
|
Cache used by SolrIndexSearcher for filters (DocSets),
|
||||||
|
unordered sets of *all* documents that match a query. When a
|
||||||
|
new searcher is opened, its caches may be prepopulated or
|
||||||
|
"autowarmed" using data from caches in the old searcher.
|
||||||
|
autowarmCount is the number of items to prepopulate. For
|
||||||
|
LRUCache, the autowarmed items will be the most recently
|
||||||
|
accessed items.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
class - the SolrCache implementation LRUCache or
|
||||||
|
(LRUCache or FastLRUCache)
|
||||||
|
size - the maximum number of entries in the cache
|
||||||
|
initialSize - the initial capacity (number of entries) of
|
||||||
|
the cache. (see java.util.HashMap)
|
||||||
|
autowarmCount - the number of entries to prepopulate from
|
||||||
|
and old cache.
|
||||||
|
-->
|
||||||
|
<filterCache class="solr.FastLRUCache"
|
||||||
|
size="512"
|
||||||
|
initialSize="512"
|
||||||
|
autowarmCount="0"/>
|
||||||
|
|
||||||
|
<!-- Query Result Cache
|
||||||
|
|
||||||
|
Caches results of searches - ordered lists of document ids
|
||||||
|
(DocList) based on a query, a sort, and the range of documents requested.
|
||||||
|
-->
|
||||||
|
<queryResultCache class="solr.LRUCache"
|
||||||
|
size="512"
|
||||||
|
initialSize="512"
|
||||||
|
autowarmCount="0"/>
|
||||||
|
|
||||||
|
<!-- Document Cache
|
||||||
|
|
||||||
|
Caches Lucene Document objects (the stored fields for each
|
||||||
|
document). Since Lucene internal document ids are transient,
|
||||||
|
this cache will not be autowarmed.
|
||||||
|
-->
|
||||||
|
<documentCache class="solr.LRUCache"
|
||||||
|
size="512"
|
||||||
|
initialSize="512"
|
||||||
|
autowarmCount="0"/>
|
||||||
|
|
||||||
|
<!-- Field Value Cache
|
||||||
|
|
||||||
|
Cache used to hold field values that are quickly accessible
|
||||||
|
by document id. The fieldValueCache is created by default
|
||||||
|
even if not configured here.
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<fieldValueCache class="solr.FastLRUCache"
|
||||||
|
size="512"
|
||||||
|
autowarmCount="128"
|
||||||
|
showItems="32" />
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Custom Cache
|
||||||
|
|
||||||
|
Example of a generic cache. These caches may be accessed by
|
||||||
|
name through SolrIndexSearcher.getCache(),cacheLookup(), and
|
||||||
|
cacheInsert(). The purpose is to enable easy caching of
|
||||||
|
user/application level data. The regenerator argument should
|
||||||
|
be specified as an implementation of solr.CacheRegenerator
|
||||||
|
if autowarming is desired.
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<cache name="myUserCache"
|
||||||
|
class="solr.LRUCache"
|
||||||
|
size="4096"
|
||||||
|
initialSize="1024"
|
||||||
|
autowarmCount="1024"
|
||||||
|
regenerator="com.mycompany.MyRegenerator"
|
||||||
|
/>
|
||||||
|
-->
|
||||||
|
|
||||||
|
|
||||||
|
<!-- Lazy Field Loading
|
||||||
|
|
||||||
|
If true, stored fields that are not requested will be loaded
|
||||||
|
lazily. This can result in a significant speed improvement
|
||||||
|
if the usual case is to not load all stored fields,
|
||||||
|
especially if the skipped fields are large compressed text
|
||||||
|
fields.
|
||||||
|
-->
|
||||||
|
<enableLazyFieldLoading>true</enableLazyFieldLoading>
|
||||||
|
|
||||||
|
<!-- Use Filter For Sorted Query
|
||||||
|
|
||||||
|
A possible optimization that attempts to use a filter to
|
||||||
|
satisfy a search. If the requested sort does not include
|
||||||
|
score, then the filterCache will be checked for a filter
|
||||||
|
matching the query. If found, the filter will be used as the
|
||||||
|
source of document ids, and then the sort will be applied to
|
||||||
|
that.
|
||||||
|
|
||||||
|
For most situations, this will not be useful unless you
|
||||||
|
frequently get the same search repeatedly with different sort
|
||||||
|
options, and none of them ever use "score"
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<useFilterForSortedQuery>true</useFilterForSortedQuery>
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Result Window Size
|
||||||
|
|
||||||
|
An optimization for use with the queryResultCache. When a search
|
||||||
|
is requested, a superset of the requested number of document ids
|
||||||
|
are collected. For example, if a search for a particular query
|
||||||
|
requests matching documents 10 through 19, and queryWindowSize is 50,
|
||||||
|
then documents 0 through 49 will be collected and cached. Any further
|
||||||
|
requests in that range can be satisfied via the cache.
|
||||||
|
-->
|
||||||
|
<queryResultWindowSize>20</queryResultWindowSize>
|
||||||
|
|
||||||
|
<!-- Maximum number of documents to cache for any entry in the
|
||||||
|
queryResultCache.
|
||||||
|
-->
|
||||||
|
<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
|
||||||
|
|
||||||
|
<!-- Query Related Event Listeners
|
||||||
|
|
||||||
|
Various IndexSearcher related events can trigger Listeners to
|
||||||
|
take actions.
|
||||||
|
|
||||||
|
newSearcher - fired whenever a new searcher is being prepared
|
||||||
|
and there is a current searcher handling requests (aka
|
||||||
|
registered). It can be used to prime certain caches to
|
||||||
|
prevent long request times for certain requests.
|
||||||
|
|
||||||
|
firstSearcher - fired whenever a new searcher is being
|
||||||
|
prepared but there is no current registered searcher to handle
|
||||||
|
requests or to gain autowarming data from.
|
||||||
|
|
||||||
|
|
||||||
|
-->
|
||||||
|
<!-- QuerySenderListener takes an array of NamedList and executes a
|
||||||
|
local query request for each NamedList in sequence.
|
||||||
|
-->
|
||||||
|
<listener event="newSearcher" class="solr.QuerySenderListener">
|
||||||
|
<arr name="queries">
|
||||||
|
<!--
|
||||||
|
<lst><str name="q">solr</str><str name="sort">price asc</str></lst>
|
||||||
|
<lst><str name="q">rocks</str><str name="sort">weight asc</str></lst>
|
||||||
|
-->
|
||||||
|
</arr>
|
||||||
|
</listener>
|
||||||
|
<listener event="firstSearcher" class="solr.QuerySenderListener">
|
||||||
|
<arr name="queries">
|
||||||
|
<lst>
|
||||||
|
<str name="q">static firstSearcher warming in solrconfig.xml</str>
|
||||||
|
</lst>
|
||||||
|
</arr>
|
||||||
|
</listener>
|
||||||
|
|
||||||
|
<!-- Use Cold Searcher
|
||||||
|
|
||||||
|
If a search request comes in and there is no current
|
||||||
|
registered searcher, then immediately register the still
|
||||||
|
warming searcher and use it. If "false" then all requests
|
||||||
|
will block until the first searcher is done warming.
|
||||||
|
-->
|
||||||
|
<useColdSearcher>false</useColdSearcher>
|
||||||
|
|
||||||
|
<!-- Max Warming Searchers
|
||||||
|
|
||||||
|
Maximum number of searchers that may be warming in the
|
||||||
|
background concurrently. An error is returned if this limit
|
||||||
|
is exceeded.
|
||||||
|
|
||||||
|
Recommend values of 1-2 for read-only slaves, higher for
|
||||||
|
masters w/o cache warming.
|
||||||
|
-->
|
||||||
|
<maxWarmingSearchers>2</maxWarmingSearchers>
|
||||||
|
|
||||||
|
</query>
|
||||||
|
|
||||||
|
|
||||||
|
<!-- Request Dispatcher
|
||||||
|
|
||||||
|
This section contains instructions for how the SolrDispatchFilter
|
||||||
|
should behave when processing requests for this SolrCore.
|
||||||
|
|
||||||
|
handleSelect is a legacy option that affects the behavior of requests
|
||||||
|
such as /select?qt=XXX
|
||||||
|
|
||||||
|
handleSelect="true" will cause the SolrDispatchFilter to process
|
||||||
|
the request and dispatch the query to a handler specified by the
|
||||||
|
"qt" param, assuming "/select" isn't already registered.
|
||||||
|
|
||||||
|
handleSelect="false" will cause the SolrDispatchFilter to
|
||||||
|
ignore "/select" requests, resulting in a 404 unless a handler
|
||||||
|
is explicitly registered with the name "/select"
|
||||||
|
|
||||||
|
handleSelect="true" is not recommended for new users, but is the default
|
||||||
|
for backwards compatibility
|
||||||
|
-->
|
||||||
|
<requestDispatcher handleSelect="false" >
|
||||||
|
<!-- Request Parsing
|
||||||
|
|
||||||
|
These settings indicate how Solr Requests may be parsed, and
|
||||||
|
what restrictions may be placed on the ContentStreams from
|
||||||
|
those requests
|
||||||
|
|
||||||
|
enableRemoteStreaming - enables use of the stream.file
|
||||||
|
and stream.url parameters for specifying remote streams.
|
||||||
|
|
||||||
|
multipartUploadLimitInKB - specifies the max size (in KiB) of
|
||||||
|
Multipart File Uploads that Solr will allow in a Request.
|
||||||
|
|
||||||
|
formdataUploadLimitInKB - specifies the max size (in KiB) of
|
||||||
|
form data (application/x-www-form-urlencoded) sent via
|
||||||
|
POST. You can use POST to pass request parameters not
|
||||||
|
fitting into the URL.
|
||||||
|
|
||||||
|
addHttpRequestToContext - if set to true, it will instruct
|
||||||
|
the requestParsers to include the original HttpServletRequest
|
||||||
|
object in the context map of the SolrQueryRequest under the
|
||||||
|
key "httpRequest". It will not be used by any of the existing
|
||||||
|
Solr components, but may be useful when developing custom
|
||||||
|
plugins.
|
||||||
|
|
||||||
|
*** WARNING ***
|
||||||
|
The settings below authorize Solr to fetch remote files, You
|
||||||
|
should make sure your system has some authentication before
|
||||||
|
using enableRemoteStreaming="true"
|
||||||
|
|
||||||
|
-->
|
||||||
|
<requestParsers enableRemoteStreaming="true"
|
||||||
|
multipartUploadLimitInKB="2048000"
|
||||||
|
formdataUploadLimitInKB="2048"
|
||||||
|
addHttpRequestToContext="false"/>
|
||||||
|
|
||||||
|
<!-- HTTP Caching
|
||||||
|
|
||||||
|
Set HTTP caching related parameters (for proxy caches and clients).
|
||||||
|
|
||||||
|
The options below instruct Solr not to output any HTTP Caching
|
||||||
|
related headers
|
||||||
|
-->
|
||||||
|
<httpCaching never304="true" />
|
||||||
|
<!-- If you include a <cacheControl> directive, it will be used to
|
||||||
|
generate a Cache-Control header (as well as an Expires header
|
||||||
|
if the value contains "max-age=")
|
||||||
|
|
||||||
|
By default, no Cache-Control header is generated.
|
||||||
|
|
||||||
|
You can use the <cacheControl> option even if you have set
|
||||||
|
never304="true"
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<httpCaching never304="true" >
|
||||||
|
<cacheControl>max-age=30, public</cacheControl>
|
||||||
|
</httpCaching>
|
||||||
|
-->
|
||||||
|
<!-- To enable Solr to respond with automatically generated HTTP
|
||||||
|
Caching headers, and to response to Cache Validation requests
|
||||||
|
correctly, set the value of never304="false"
|
||||||
|
|
||||||
|
This will cause Solr to generate Last-Modified and ETag
|
||||||
|
headers based on the properties of the Index.
|
||||||
|
|
||||||
|
The following options can also be specified to affect the
|
||||||
|
values of these headers...
|
||||||
|
|
||||||
|
lastModFrom - the default value is "openTime" which means the
|
||||||
|
Last-Modified value (and validation against If-Modified-Since
|
||||||
|
requests) will all be relative to when the current Searcher
|
||||||
|
was opened. You can change it to lastModFrom="dirLastMod" if
|
||||||
|
you want the value to exactly correspond to when the physical
|
||||||
|
index was last modified.
|
||||||
|
|
||||||
|
etagSeed="..." is an option you can change to force the ETag
|
||||||
|
header (and validation against If-None-Match requests) to be
|
||||||
|
different even if the index has not changed (ie: when making
|
||||||
|
significant changes to your config file)
|
||||||
|
|
||||||
|
(lastModifiedFrom and etagSeed are both ignored if you use
|
||||||
|
the never304="true" option)
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<httpCaching lastModifiedFrom="openTime"
|
||||||
|
etagSeed="Solr">
|
||||||
|
<cacheControl>max-age=30, public</cacheControl>
|
||||||
|
</httpCaching>
|
||||||
|
-->
|
||||||
|
</requestDispatcher>
|
||||||
|
|
||||||
|
<!-- Request Handlers
|
||||||
|
|
||||||
|
http://wiki.apache.org/solr/SolrRequestHandler
|
||||||
|
|
||||||
|
Incoming queries will be dispatched to a specific handler by name
|
||||||
|
based on the path specified in the request.
|
||||||
|
|
||||||
|
Legacy behavior: If the request path uses "/select" but no Request
|
||||||
|
Handler has that name, and if handleSelect="true" has been specified in
|
||||||
|
the requestDispatcher, then the Request Handler is dispatched based on
|
||||||
|
the qt parameter. Handlers without a leading '/' are accessed this way
|
||||||
|
like so: http://host/app/[core/]select?qt=name If no qt is
|
||||||
|
given, then the requestHandler that declares default="true" will be
|
||||||
|
used or the one named "standard".
|
||||||
|
|
||||||
|
If a Request Handler is declared with startup="lazy", then it will
|
||||||
|
not be initialized until the first request that uses it.
|
||||||
|
|
||||||
|
-->
|
||||||
|
<!-- SearchHandler
|
||||||
|
|
||||||
|
http://wiki.apache.org/solr/SearchHandler
|
||||||
|
|
||||||
|
For processing Search Queries, the primary Request Handler
|
||||||
|
provided with Solr is "SearchHandler" It delegates to a sequent
|
||||||
|
of SearchComponents (see below) and supports distributed
|
||||||
|
queries across multiple shards
|
||||||
|
-->
|
||||||
|
<requestHandler name="/select" class="solr.SearchHandler">
|
||||||
|
<!-- default values for query parameters can be specified, these
|
||||||
|
will be overridden by parameters in the request
|
||||||
|
-->
|
||||||
|
<lst name="defaults">
|
||||||
|
<str name="echoParams">explicit</str>
|
||||||
|
<int name="rows">10</int>
|
||||||
|
<str name="df">_text</str>
|
||||||
|
</lst>
|
||||||
|
<!-- In addition to defaults, "appends" params can be specified
|
||||||
|
to identify values which should be appended to the list of
|
||||||
|
multi-val params from the query (or the existing "defaults").
|
||||||
|
-->
|
||||||
|
<!-- In this example, the param "fq=instock:true" would be appended to
|
||||||
|
any query time fq params the user may specify, as a mechanism for
|
||||||
|
partitioning the index, independent of any user selected filtering
|
||||||
|
that may also be desired (perhaps as a result of faceted searching).
|
||||||
|
|
||||||
|
NOTE: there is *absolutely* nothing a client can do to prevent these
|
||||||
|
"appends" values from being used, so don't use this mechanism
|
||||||
|
unless you are sure you always want it.
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<lst name="appends">
|
||||||
|
<str name="fq">inStock:true</str>
|
||||||
|
</lst>
|
||||||
|
-->
|
||||||
|
<!-- "invariants" are a way of letting the Solr maintainer lock down
|
||||||
|
the options available to Solr clients. Any params values
|
||||||
|
specified here are used regardless of what values may be specified
|
||||||
|
in either the query, the "defaults", or the "appends" params.
|
||||||
|
|
||||||
|
In this example, the facet.field and facet.query params would
|
||||||
|
be fixed, limiting the facets clients can use. Faceting is
|
||||||
|
not turned on by default - but if the client does specify
|
||||||
|
facet=true in the request, these are the only facets they
|
||||||
|
will be able to see counts for; regardless of what other
|
||||||
|
facet.field or facet.query params they may specify.
|
||||||
|
|
||||||
|
NOTE: there is *absolutely* nothing a client can do to prevent these
|
||||||
|
"invariants" values from being used, so don't use this mechanism
|
||||||
|
unless you are sure you always want it.
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<lst name="invariants">
|
||||||
|
<str name="facet.field">cat</str>
|
||||||
|
<str name="facet.field">manu_exact</str>
|
||||||
|
<str name="facet.query">price:[* TO 500]</str>
|
||||||
|
<str name="facet.query">price:[500 TO *]</str>
|
||||||
|
</lst>
|
||||||
|
-->
|
||||||
|
<!-- If the default list of SearchComponents is not desired, that
|
||||||
|
list can either be overridden completely, or components can be
|
||||||
|
prepended or appended to the default list. (see below)
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<arr name="components">
|
||||||
|
<str>nameOfCustomComponent1</str>
|
||||||
|
<str>nameOfCustomComponent2</str>
|
||||||
|
</arr>
|
||||||
|
-->
|
||||||
|
</requestHandler>
|
||||||
|
|
||||||
|
<!-- A request handler that returns indented JSON by default -->
|
||||||
|
<requestHandler name="/query" class="solr.SearchHandler">
|
||||||
|
<lst name="defaults">
|
||||||
|
<str name="echoParams">explicit</str>
|
||||||
|
<str name="wt">json</str>
|
||||||
|
<str name="indent">true</str>
|
||||||
|
<str name="df">_text</str>
|
||||||
|
</lst>
|
||||||
|
</requestHandler>
|
||||||
|
|
||||||
|
|
||||||
|
<!-- realtime get handler, guaranteed to return the latest stored fields of
|
||||||
|
any document, without the need to commit or open a new searcher. The
|
||||||
|
current implementation relies on the updateLog feature being enabled. -->
|
||||||
|
<requestHandler name="/get" class="solr.RealTimeGetHandler">
|
||||||
|
<lst name="defaults">
|
||||||
|
<str name="omitHeader">true</str>
|
||||||
|
<str name="wt">json</str>
|
||||||
|
<str name="indent">true</str>
|
||||||
|
</lst>
|
||||||
|
</requestHandler>
|
||||||
|
|
||||||
|
|
||||||
|
<!-- A Robust Example
|
||||||
|
|
||||||
|
This example SearchHandler declaration shows off usage of the
|
||||||
|
SearchHandler with many defaults declared
|
||||||
|
|
||||||
|
Note that multiple instances of the same Request Handler
|
||||||
|
(SearchHandler) can be registered multiple times with different
|
||||||
|
names (and different init parameters)
|
||||||
|
-->
|
||||||
|
<requestHandler name="/browse" class="solr.SearchHandler">
|
||||||
|
<lst name="defaults">
|
||||||
|
<str name="echoParams">explicit</str>
|
||||||
|
|
||||||
|
<!-- VelocityResponseWriter settings -->
|
||||||
|
<str name="wt">velocity</str>
|
||||||
|
<str name="v.template">browse</str>
|
||||||
|
<str name="v.layout">layout</str>
|
||||||
|
<str name="title">Solritas</str>
|
||||||
|
|
||||||
|
<!-- Query settings -->
|
||||||
|
<str name="defType">edismax</str>
|
||||||
|
<str name="qf">
|
||||||
|
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
|
||||||
|
title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
|
||||||
|
</str>
|
||||||
|
<str name="df">_text</str>
|
||||||
|
<str name="mm">100%</str>
|
||||||
|
<str name="q.alt">*:*</str>
|
||||||
|
<str name="rows">10</str>
|
||||||
|
<str name="fl">*,score</str>
|
||||||
|
|
||||||
|
<str name="mlt.qf">
|
||||||
|
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
|
||||||
|
title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
|
||||||
|
</str>
|
||||||
|
<str name="mlt.fl">text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename</str>
|
||||||
|
<int name="mlt.count">3</int>
|
||||||
|
|
||||||
|
<!-- Faceting defaults -->
|
||||||
|
<str name="facet">on</str>
|
||||||
|
<str name="facet.field">cat</str>
|
||||||
|
<str name="facet.field">manu_exact</str>
|
||||||
|
<str name="facet.field">content_type</str>
|
||||||
|
<str name="facet.field">author_s</str>
|
||||||
|
<str name="facet.query">ipod</str>
|
||||||
|
<str name="facet.query">GB</str>
|
||||||
|
<str name="facet.mincount">1</str>
|
||||||
|
<str name="facet.pivot">cat,inStock</str>
|
||||||
|
<str name="facet.range.other">after</str>
|
||||||
|
<str name="facet.range">price</str>
|
||||||
|
<int name="f.price.facet.range.start">0</int>
|
||||||
|
<int name="f.price.facet.range.end">600</int>
|
||||||
|
<int name="f.price.facet.range.gap">50</int>
|
||||||
|
<str name="facet.range">popularity</str>
|
||||||
|
<int name="f.popularity.facet.range.start">0</int>
|
||||||
|
<int name="f.popularity.facet.range.end">10</int>
|
||||||
|
<int name="f.popularity.facet.range.gap">3</int>
|
||||||
|
<str name="facet.range">manufacturedate_dt</str>
|
||||||
|
<str name="f.manufacturedate_dt.facet.range.start">NOW/YEAR-10YEARS</str>
|
||||||
|
<str name="f.manufacturedate_dt.facet.range.end">NOW</str>
|
||||||
|
<str name="f.manufacturedate_dt.facet.range.gap">+1YEAR</str>
|
||||||
|
<str name="f.manufacturedate_dt.facet.range.other">before</str>
|
||||||
|
<str name="f.manufacturedate_dt.facet.range.other">after</str>
|
||||||
|
|
||||||
|
<!-- Highlighting defaults -->
|
||||||
|
<str name="hl">on</str>
|
||||||
|
<str name="hl.fl">content features title name</str>
|
||||||
|
<str name="hl.encoder">html</str>
|
||||||
|
<str name="hl.simple.pre"><b></str>
|
||||||
|
<str name="hl.simple.post"></b></str>
|
||||||
|
<str name="f.title.hl.fragsize">0</str>
|
||||||
|
<str name="f.title.hl.alternateField">title</str>
|
||||||
|
<str name="f.name.hl.fragsize">0</str>
|
||||||
|
<str name="f.name.hl.alternateField">name</str>
|
||||||
|
<str name="f.content.hl.snippets">3</str>
|
||||||
|
<str name="f.content.hl.fragsize">200</str>
|
||||||
|
<str name="f.content.hl.alternateField">content</str>
|
||||||
|
<str name="f.content.hl.maxAlternateFieldLength">750</str>
|
||||||
|
|
||||||
|
<!-- Spell checking defaults -->
|
||||||
|
<str name="spellcheck">on</str>
|
||||||
|
<str name="spellcheck.extendedResults">false</str>
|
||||||
|
<str name="spellcheck.count">5</str>
|
||||||
|
<str name="spellcheck.alternativeTermCount">2</str>
|
||||||
|
<str name="spellcheck.maxResultsForSuggest">5</str>
|
||||||
|
<str name="spellcheck.collate">true</str>
|
||||||
|
<str name="spellcheck.collateExtendedResults">true</str>
|
||||||
|
<str name="spellcheck.maxCollationTries">5</str>
|
||||||
|
<str name="spellcheck.maxCollations">3</str>
|
||||||
|
</lst>
|
||||||
|
|
||||||
|
<!-- append spellchecking to our list of components -->
|
||||||
|
<arr name="last-components">
|
||||||
|
<str>spellcheck</str>
|
||||||
|
</arr>
|
||||||
|
</requestHandler>
|
||||||
|
|
||||||
|
|
||||||
|
<!-- Update Request Handler.
|
||||||
|
|
||||||
|
http://wiki.apache.org/solr/UpdateXmlMessages
|
||||||
|
|
||||||
|
The canonical Request Handler for Modifying the Index through
|
||||||
|
commands specified using XML, JSON, CSV, or JAVABIN
|
||||||
|
|
||||||
|
Note: Since solr1.1 requestHandlers requires a valid content
|
||||||
|
type header if posted in the body. For example, curl now
|
||||||
|
requires: -H 'Content-type:text/xml; charset=utf-8'
|
||||||
|
|
||||||
|
To override the request content type and force a specific
|
||||||
|
Content-type, use the request parameter:
|
||||||
|
?update.contentType=text/csv
|
||||||
|
|
||||||
|
This handler will pick a response format to match the input
|
||||||
|
if the 'wt' parameter is not explicit
|
||||||
|
-->
|
||||||
|
<requestHandler name="/update" class="solr.UpdateRequestHandler">
|
||||||
|
<!-- See below for information on defining
|
||||||
|
updateRequestProcessorChains that can be used by name
|
||||||
|
on each Update Request
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<lst name="defaults">
|
||||||
|
<str name="update.chain">dedupe</str>
|
||||||
|
</lst>
|
||||||
|
-->
|
||||||
|
</requestHandler>
|
||||||
|
|
||||||
|
<!-- for back compat with clients using /update/json and /update/csv -->
|
||||||
|
<requestHandler name="/update/json" class="solr.JsonUpdateRequestHandler">
|
||||||
|
<lst name="defaults">
|
||||||
|
<str name="stream.contentType">application/json</str>
|
||||||
|
</lst>
|
||||||
|
</requestHandler>
|
||||||
|
<requestHandler name="/update/csv" class="solr.CSVRequestHandler">
|
||||||
|
<lst name="defaults">
|
||||||
|
<str name="stream.contentType">application/csv</str>
|
||||||
|
</lst>
|
||||||
|
</requestHandler>
|
||||||
|
|
||||||
|
<!-- Solr Cell Update Request Handler
|
||||||
|
|
||||||
|
http://wiki.apache.org/solr/ExtractingRequestHandler
|
||||||
|
|
||||||
|
-->
|
||||||
|
<requestHandler name="/update/extract"
|
||||||
|
startup="lazy"
|
||||||
|
class="solr.extraction.ExtractingRequestHandler" >
|
||||||
|
<lst name="defaults">
|
||||||
|
<str name="lowernames">true</str>
|
||||||
|
<str name="uprefix">ignored_</str>
|
||||||
|
|
||||||
|
<!-- capture link hrefs but ignore div attributes -->
|
||||||
|
<str name="captureAttr">true</str>
|
||||||
|
<str name="fmap.a">links</str>
|
||||||
|
<str name="fmap.div">ignored_</str>
|
||||||
|
</lst>
|
||||||
|
</requestHandler>
|
||||||
|
|
||||||
|
|
||||||
|
<!-- Field Analysis Request Handler
|
||||||
|
|
||||||
|
RequestHandler that provides much the same functionality as
|
||||||
|
analysis.jsp. Provides the ability to specify multiple field
|
||||||
|
types and field names in the same request and outputs
|
||||||
|
index-time and query-time analysis for each of them.
|
||||||
|
|
||||||
|
Request parameters are:
|
||||||
|
analysis.fieldname - field name whose analyzers are to be used
|
||||||
|
|
||||||
|
analysis.fieldtype - field type whose analyzers are to be used
|
||||||
|
analysis.fieldvalue - text for index-time analysis
|
||||||
|
q (or analysis.q) - text for query time analysis
|
||||||
|
analysis.showmatch (true|false) - When set to true and when
|
||||||
|
query analysis is performed, the produced tokens of the
|
||||||
|
field value analysis will be marked as "matched" for every
|
||||||
|
token that is produces by the query analysis
|
||||||
|
-->
|
||||||
|
<requestHandler name="/analysis/field"
|
||||||
|
startup="lazy"
|
||||||
|
class="solr.FieldAnalysisRequestHandler" />
|
||||||
|
|
||||||
|
|
||||||
|
<!-- Document Analysis Handler
|
||||||
|
|
||||||
|
http://wiki.apache.org/solr/AnalysisRequestHandler
|
||||||
|
|
||||||
|
An analysis handler that provides a breakdown of the analysis
|
||||||
|
process of provided documents. This handler expects a (single)
|
||||||
|
content stream with the following format:
|
||||||
|
|
||||||
|
<docs>
|
||||||
|
<doc>
|
||||||
|
<field name="id">1</field>
|
||||||
|
<field name="name">The Name</field>
|
||||||
|
<field name="text">The Text Value</field>
|
||||||
|
</doc>
|
||||||
|
<doc>...</doc>
|
||||||
|
<doc>...</doc>
|
||||||
|
...
|
||||||
|
</docs>
|
||||||
|
|
||||||
|
Note: Each document must contain a field which serves as the
|
||||||
|
unique key. This key is used in the returned response to associate
|
||||||
|
an analysis breakdown to the analyzed document.
|
||||||
|
|
||||||
|
Like the FieldAnalysisRequestHandler, this handler also supports
|
||||||
|
query analysis by sending either an "analysis.query" or "q"
|
||||||
|
request parameter that holds the query text to be analyzed. It
|
||||||
|
also supports the "analysis.showmatch" parameter which when set to
|
||||||
|
true, all field tokens that match the query tokens will be marked
|
||||||
|
as a "match".
|
||||||
|
-->
|
||||||
|
<requestHandler name="/analysis/document"
|
||||||
|
class="solr.DocumentAnalysisRequestHandler"
|
||||||
|
startup="lazy" />
|
||||||
|
|
||||||
|
<!-- Admin Handlers
|
||||||
|
|
||||||
|
Admin Handlers - This will register all the standard admin
|
||||||
|
RequestHandlers.
|
||||||
|
-->
|
||||||
|
<requestHandler name="/admin/"
|
||||||
|
class="solr.admin.AdminHandlers" />
|
||||||
|
<!-- This single handler is equivalent to the following... -->
|
||||||
|
<!--
|
||||||
|
<requestHandler name="/admin/luke" class="solr.admin.LukeRequestHandler" />
|
||||||
|
<requestHandler name="/admin/system" class="solr.admin.SystemInfoHandler" />
|
||||||
|
<requestHandler name="/admin/plugins" class="solr.admin.PluginInfoHandler" />
|
||||||
|
<requestHandler name="/admin/threads" class="solr.admin.ThreadDumpHandler" />
|
||||||
|
<requestHandler name="/admin/properties" class="solr.admin.PropertiesRequestHandler" />
|
||||||
|
<requestHandler name="/admin/file" class="solr.admin.ShowFileRequestHandler" >
|
||||||
|
-->
|
||||||
|
<!-- If you wish to hide files under ${solr.home}/conf, explicitly
|
||||||
|
register the ShowFileRequestHandler using:
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<requestHandler name="/admin/file"
|
||||||
|
class="solr.admin.ShowFileRequestHandler" >
|
||||||
|
<lst name="invariants">
|
||||||
|
<str name="hidden">synonyms.txt</str>
|
||||||
|
<str name="hidden">anotherfile.txt</str>
|
||||||
|
</lst>
|
||||||
|
</requestHandler>
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- ping/healthcheck -->
|
||||||
|
<requestHandler name="/admin/ping" class="solr.PingRequestHandler">
|
||||||
|
<lst name="invariants">
|
||||||
|
<str name="q">solrpingquery</str>
|
||||||
|
</lst>
|
||||||
|
<lst name="defaults">
|
||||||
|
<str name="echoParams">all</str>
|
||||||
|
</lst>
|
||||||
|
<!-- An optional feature of the PingRequestHandler is to configure the
|
||||||
|
handler with a "healthcheckFile" which can be used to enable/disable
|
||||||
|
the PingRequestHandler.
|
||||||
|
relative paths are resolved against the data dir
|
||||||
|
-->
|
||||||
|
<!-- <str name="healthcheckFile">server-enabled.txt</str> -->
|
||||||
|
</requestHandler>
|
||||||
|
|
||||||
|
<!-- Echo the request contents back to the client -->
|
||||||
|
<requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
|
||||||
|
<lst name="defaults">
|
||||||
|
<str name="echoParams">explicit</str>
|
||||||
|
<str name="echoHandler">true</str>
|
||||||
|
</lst>
|
||||||
|
</requestHandler>
|
||||||
|
|
||||||
|
<!-- Solr Replication
|
||||||
|
|
||||||
|
The SolrReplicationHandler supports replicating indexes from a
|
||||||
|
"master" used for indexing and "slaves" used for queries.
|
||||||
|
|
||||||
|
http://wiki.apache.org/solr/SolrReplication
|
||||||
|
|
||||||
|
It is also necessary for SolrCloud to function (in Cloud mode, the
|
||||||
|
replication handler is used to bulk transfer segments when nodes
|
||||||
|
are added or need to recover).
|
||||||
|
|
||||||
|
https://wiki.apache.org/solr/SolrCloud/
|
||||||
|
-->
|
||||||
|
<requestHandler name="/replication" class="solr.ReplicationHandler" >
|
||||||
|
<!--
|
||||||
|
To enable simple master/slave replication, uncomment one of the
|
||||||
|
sections below, depending on whether this solr instance should be
|
||||||
|
the "master" or a "slave". If this instance is a "slave" you will
|
||||||
|
also need to fill in the masterUrl to point to a real machine.
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<lst name="master">
|
||||||
|
<str name="replicateAfter">commit</str>
|
||||||
|
<str name="replicateAfter">startup</str>
|
||||||
|
<str name="confFiles">schema.xml,stopwords.txt</str>
|
||||||
|
</lst>
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<lst name="slave">
|
||||||
|
<str name="masterUrl">http://your-master-hostname:8983/solr</str>
|
||||||
|
<str name="pollInterval">00:00:60</str>
|
||||||
|
</lst>
|
||||||
|
-->
|
||||||
|
</requestHandler>
|
||||||
|
|
||||||
|
<!-- Search Components
|
||||||
|
|
||||||
|
Search components are registered to SolrCore and used by
|
||||||
|
instances of SearchHandler (which can access them by name)
|
||||||
|
|
||||||
|
By default, the following components are available:
|
||||||
|
|
||||||
|
<searchComponent name="query" class="solr.QueryComponent" />
|
||||||
|
<searchComponent name="facet" class="solr.FacetComponent" />
|
||||||
|
<searchComponent name="mlt" class="solr.MoreLikeThisComponent" />
|
||||||
|
<searchComponent name="highlight" class="solr.HighlightComponent" />
|
||||||
|
<searchComponent name="stats" class="solr.StatsComponent" />
|
||||||
|
<searchComponent name="debug" class="solr.DebugComponent" />
|
||||||
|
|
||||||
|
Default configuration in a requestHandler would look like:
|
||||||
|
|
||||||
|
<arr name="components">
|
||||||
|
<str>query</str>
|
||||||
|
<str>facet</str>
|
||||||
|
<str>mlt</str>
|
||||||
|
<str>highlight</str>
|
||||||
|
<str>stats</str>
|
||||||
|
<str>debug</str>
|
||||||
|
</arr>
|
||||||
|
|
||||||
|
If you register a searchComponent to one of the standard names,
|
||||||
|
that will be used instead of the default.
|
||||||
|
|
||||||
|
To insert components before or after the 'standard' components, use:
|
||||||
|
|
||||||
|
<arr name="first-components">
|
||||||
|
<str>myFirstComponentName</str>
|
||||||
|
</arr>
|
||||||
|
|
||||||
|
<arr name="last-components">
|
||||||
|
<str>myLastComponentName</str>
|
||||||
|
</arr>
|
||||||
|
|
||||||
|
NOTE: The component registered with the name "debug" will
|
||||||
|
always be executed after the "last-components"
|
||||||
|
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Spell Check
|
||||||
|
|
||||||
|
The spell check component can return a list of alternative spelling
|
||||||
|
suggestions.
|
||||||
|
|
||||||
|
http://wiki.apache.org/solr/SpellCheckComponent
|
||||||
|
-->
|
||||||
|
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
|
||||||
|
|
||||||
|
<str name="queryAnalyzerFieldType">text_general</str>
|
||||||
|
|
||||||
|
<!-- Multiple "Spell Checkers" can be declared and used by this
|
||||||
|
component
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- a spellchecker built from a field of the main index -->
|
||||||
|
<lst name="spellchecker">
|
||||||
|
<str name="name">default</str>
|
||||||
|
<str name="field">text</str>
|
||||||
|
<str name="classname">solr.DirectSolrSpellChecker</str>
|
||||||
|
<!-- the spellcheck distance measure used, the default is the internal levenshtein -->
|
||||||
|
<str name="distanceMeasure">internal</str>
|
||||||
|
<!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
|
||||||
|
<float name="accuracy">0.5</float>
|
||||||
|
<!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
|
||||||
|
<int name="maxEdits">2</int>
|
||||||
|
<!-- the minimum shared prefix when enumerating terms -->
|
||||||
|
<int name="minPrefix">1</int>
|
||||||
|
<!-- maximum number of inspections per result. -->
|
||||||
|
<int name="maxInspections">5</int>
|
||||||
|
<!-- minimum length of a query term to be considered for correction -->
|
||||||
|
<int name="minQueryLength">4</int>
|
||||||
|
<!-- maximum threshold of documents a query term can appear to be considered for correction -->
|
||||||
|
<float name="maxQueryFrequency">0.01</float>
|
||||||
|
<!-- uncomment this to require suggestions to occur in 1% of the documents
|
||||||
|
<float name="thresholdTokenFrequency">.01</float>
|
||||||
|
-->
|
||||||
|
</lst>
|
||||||
|
|
||||||
|
<!-- a spellchecker that can break or combine words. See "/spell" handler below for usage -->
|
||||||
|
<lst name="spellchecker">
|
||||||
|
<str name="name">wordbreak</str>
|
||||||
|
<str name="classname">solr.WordBreakSolrSpellChecker</str>
|
||||||
|
<str name="field">name</str>
|
||||||
|
<str name="combineWords">true</str>
|
||||||
|
<str name="breakWords">true</str>
|
||||||
|
<int name="maxChanges">10</int>
|
||||||
|
</lst>
|
||||||
|
|
||||||
|
<!-- a spellchecker that uses a different distance measure -->
|
||||||
|
<!--
|
||||||
|
<lst name="spellchecker">
|
||||||
|
<str name="name">jarowinkler</str>
|
||||||
|
<str name="field">spell</str>
|
||||||
|
<str name="classname">solr.DirectSolrSpellChecker</str>
|
||||||
|
<str name="distanceMeasure">
|
||||||
|
org.apache.lucene.search.spell.JaroWinklerDistance
|
||||||
|
</str>
|
||||||
|
</lst>
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- a spellchecker that use an alternate comparator
|
||||||
|
|
||||||
|
comparatorClass be one of:
|
||||||
|
1. score (default)
|
||||||
|
2. freq (Frequency first, then score)
|
||||||
|
3. A fully qualified class name
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<lst name="spellchecker">
|
||||||
|
<str name="name">freq</str>
|
||||||
|
<str name="field">lowerfilt</str>
|
||||||
|
<str name="classname">solr.DirectSolrSpellChecker</str>
|
||||||
|
<str name="comparatorClass">freq</str>
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- A spellchecker that reads the list of words from a file -->
|
||||||
|
<!--
|
||||||
|
<lst name="spellchecker">
|
||||||
|
<str name="classname">solr.FileBasedSpellChecker</str>
|
||||||
|
<str name="name">file</str>
|
||||||
|
<str name="sourceLocation">spellings.txt</str>
|
||||||
|
<str name="characterEncoding">UTF-8</str>
|
||||||
|
<str name="spellcheckIndexDir">spellcheckerFile</str>
|
||||||
|
</lst>
|
||||||
|
-->
|
||||||
|
</searchComponent>
|
||||||
|
|
||||||
|
<!-- A request handler for demonstrating the spellcheck component.
|
||||||
|
|
||||||
|
NOTE: This is purely as an example. The whole purpose of the
|
||||||
|
SpellCheckComponent is to hook it into the request handler that
|
||||||
|
handles your normal user queries so that a separate request is
|
||||||
|
not needed to get suggestions.
|
||||||
|
|
||||||
|
IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS
|
||||||
|
NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM!
|
||||||
|
|
||||||
|
See http://wiki.apache.org/solr/SpellCheckComponent for details
|
||||||
|
on the request parameters.
|
||||||
|
-->
|
||||||
|
<requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
|
||||||
|
<lst name="defaults">
|
||||||
|
<str name="df">_text</str>
|
||||||
|
<!-- Solr will use suggestions from both the 'default' spellchecker
|
||||||
|
and from the 'wordbreak' spellchecker and combine them.
|
||||||
|
collations (re-written queries) can include a combination of
|
||||||
|
corrections from both spellcheckers -->
|
||||||
|
<str name="spellcheck.dictionary">default</str>
|
||||||
|
<str name="spellcheck.dictionary">wordbreak</str>
|
||||||
|
<str name="spellcheck">on</str>
|
||||||
|
<str name="spellcheck.extendedResults">true</str>
|
||||||
|
<str name="spellcheck.count">10</str>
|
||||||
|
<str name="spellcheck.alternativeTermCount">5</str>
|
||||||
|
<str name="spellcheck.maxResultsForSuggest">5</str>
|
||||||
|
<str name="spellcheck.collate">true</str>
|
||||||
|
<str name="spellcheck.collateExtendedResults">true</str>
|
||||||
|
<str name="spellcheck.maxCollationTries">10</str>
|
||||||
|
<str name="spellcheck.maxCollations">5</str>
|
||||||
|
</lst>
|
||||||
|
<arr name="last-components">
|
||||||
|
<str>spellcheck</str>
|
||||||
|
</arr>
|
||||||
|
</requestHandler>
|
||||||
|
|
||||||
|
<!-- Term Vector Component
|
||||||
|
|
||||||
|
http://wiki.apache.org/solr/TermVectorComponent
|
||||||
|
-->
|
||||||
|
<searchComponent name="tvComponent" class="solr.TermVectorComponent"/>
|
||||||
|
|
||||||
|
<!-- A request handler for demonstrating the term vector component
|
||||||
|
|
||||||
|
This is purely as an example.
|
||||||
|
|
||||||
|
In reality you will likely want to add the component to your
|
||||||
|
already specified request handlers.
|
||||||
|
-->
|
||||||
|
<requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy">
|
||||||
|
<lst name="defaults">
|
||||||
|
<str name="df">_text</str>
|
||||||
|
<bool name="tv">true</bool>
|
||||||
|
</lst>
|
||||||
|
<arr name="last-components">
|
||||||
|
<str>tvComponent</str>
|
||||||
|
</arr>
|
||||||
|
</requestHandler>
|
||||||
|
|
||||||
|
<!-- Clustering Component
|
||||||
|
|
||||||
|
http://wiki.apache.org/solr/ClusteringComponent
|
||||||
|
|
||||||
|
You'll need to set the solr.clustering.enabled system property
|
||||||
|
when running solr to run with clustering enabled:
|
||||||
|
|
||||||
|
java -Dsolr.clustering.enabled=true -jar start.jar
|
||||||
|
|
||||||
|
-->
|
||||||
|
<searchComponent name="clustering"
|
||||||
|
enable="${solr.clustering.enabled:false}"
|
||||||
|
class="solr.clustering.ClusteringComponent" >
|
||||||
|
<!-- Declare an engine -->
|
||||||
|
<lst name="engine">
|
||||||
|
<!-- The name, only one can be named "default" -->
|
||||||
|
<str name="name">default</str>
|
||||||
|
|
||||||
|
<!-- Class name of Carrot2 clustering algorithm.
|
||||||
|
|
||||||
|
Currently available algorithms are:
|
||||||
|
|
||||||
|
* org.carrot2.clustering.lingo.LingoClusteringAlgorithm
|
||||||
|
* org.carrot2.clustering.stc.STCClusteringAlgorithm
|
||||||
|
* org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm
|
||||||
|
|
||||||
|
See http://project.carrot2.org/algorithms.html for the
|
||||||
|
algorithm's characteristics.
|
||||||
|
-->
|
||||||
|
<str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
|
||||||
|
|
||||||
|
<!-- Overriding values for Carrot2 default algorithm attributes.
|
||||||
|
|
||||||
|
For a description of all available attributes, see:
|
||||||
|
http://download.carrot2.org/stable/manual/#chapter.components.
|
||||||
|
Use attribute key as name attribute of str elements
|
||||||
|
below. These can be further overridden for individual
|
||||||
|
requests by specifying attribute key as request parameter
|
||||||
|
name and attribute value as parameter value.
|
||||||
|
-->
|
||||||
|
<str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
|
||||||
|
|
||||||
|
<!-- Location of Carrot2 lexical resources.
|
||||||
|
|
||||||
|
A directory from which to load Carrot2-specific stop words
|
||||||
|
and stop labels. Absolute or relative to Solr config directory.
|
||||||
|
If a specific resource (e.g. stopwords.en) is present in the
|
||||||
|
specified dir, it will completely override the corresponding
|
||||||
|
default one that ships with Carrot2.
|
||||||
|
|
||||||
|
For an overview of Carrot2 lexical resources, see:
|
||||||
|
http://download.carrot2.org/head/manual/#chapter.lexical-resources
|
||||||
|
-->
|
||||||
|
<str name="carrot.lexicalResourcesDir">clustering/carrot2</str>
|
||||||
|
|
||||||
|
<!-- The language to assume for the documents.
|
||||||
|
|
||||||
|
For a list of allowed values, see:
|
||||||
|
http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage
|
||||||
|
-->
|
||||||
|
<str name="MultilingualClustering.defaultLanguage">ENGLISH</str>
|
||||||
|
</lst>
|
||||||
|
<lst name="engine">
|
||||||
|
<str name="name">stc</str>
|
||||||
|
<str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
|
||||||
|
</lst>
|
||||||
|
</searchComponent>
|
||||||
|
|
||||||
|
<!-- A request handler for demonstrating the clustering component
|
||||||
|
|
||||||
|
This is purely as an example.
|
||||||
|
|
||||||
|
In reality you will likely want to add the component to your
|
||||||
|
already specified request handlers.
|
||||||
|
-->
|
||||||
|
<requestHandler name="/clustering"
|
||||||
|
startup="lazy"
|
||||||
|
enable="${solr.clustering.enabled:false}"
|
||||||
|
class="solr.SearchHandler">
|
||||||
|
<lst name="defaults">
|
||||||
|
<bool name="clustering">true</bool>
|
||||||
|
<str name="clustering.engine">default</str>
|
||||||
|
<bool name="clustering.results">true</bool>
|
||||||
|
<!-- The title field -->
|
||||||
|
<str name="carrot.title">name</str>
|
||||||
|
<str name="carrot.url">id</str>
|
||||||
|
<!-- The field to cluster on -->
|
||||||
|
<str name="carrot.snippet">features</str>
|
||||||
|
<!-- produce summaries -->
|
||||||
|
<bool name="carrot.produceSummary">true</bool>
|
||||||
|
<!-- the maximum number of labels per cluster -->
|
||||||
|
<!--<int name="carrot.numDescriptions">5</int>-->
|
||||||
|
<!-- produce sub clusters -->
|
||||||
|
<bool name="carrot.outputSubClusters">false</bool>
|
||||||
|
|
||||||
|
<str name="defType">edismax</str>
|
||||||
|
<str name="qf">
|
||||||
|
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
|
||||||
|
</str>
|
||||||
|
<str name="q.alt">*:*</str>
|
||||||
|
<str name="rows">10</str>
|
||||||
|
<str name="fl">*,score</str>
|
||||||
|
</lst>
|
||||||
|
<arr name="last-components">
|
||||||
|
<str>clustering</str>
|
||||||
|
</arr>
|
||||||
|
</requestHandler>
|
||||||
|
|
||||||
|
<!-- Terms Component
|
||||||
|
|
||||||
|
http://wiki.apache.org/solr/TermsComponent
|
||||||
|
|
||||||
|
A component to return terms and document frequency of those
|
||||||
|
terms
|
||||||
|
-->
|
||||||
|
<searchComponent name="terms" class="solr.TermsComponent"/>
|
||||||
|
|
||||||
|
<!-- A request handler for demonstrating the terms component -->
|
||||||
|
<requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
|
||||||
|
<lst name="defaults">
|
||||||
|
<bool name="terms">true</bool>
|
||||||
|
<bool name="distrib">false</bool>
|
||||||
|
</lst>
|
||||||
|
<arr name="components">
|
||||||
|
<str>terms</str>
|
||||||
|
</arr>
|
||||||
|
</requestHandler>
|
||||||
|
|
||||||
|
|
||||||
|
<!-- Query Elevation Component
|
||||||
|
|
||||||
|
http://wiki.apache.org/solr/QueryElevationComponent
|
||||||
|
|
||||||
|
a search component that enables you to configure the top
|
||||||
|
results for a given query regardless of the normal lucene
|
||||||
|
scoring.
|
||||||
|
-->
|
||||||
|
<searchComponent name="elevator" class="solr.QueryElevationComponent" >
|
||||||
|
<!-- pick a fieldType to analyze queries -->
|
||||||
|
<str name="queryFieldType">string</str>
|
||||||
|
<str name="config-file">elevate.xml</str>
|
||||||
|
</searchComponent>
|
||||||
|
|
||||||
|
<!-- A request handler for demonstrating the elevator component -->
|
||||||
|
<requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
|
||||||
|
<lst name="defaults">
|
||||||
|
<str name="echoParams">explicit</str>
|
||||||
|
<str name="df">text</str>
|
||||||
|
</lst>
|
||||||
|
<arr name="last-components">
|
||||||
|
<str>elevator</str>
|
||||||
|
</arr>
|
||||||
|
</requestHandler>
|
||||||
|
|
||||||
|
<!-- Highlighting Component
|
||||||
|
|
||||||
|
http://wiki.apache.org/solr/HighlightingParameters
|
||||||
|
-->
|
||||||
|
<searchComponent class="solr.HighlightComponent" name="highlight">
|
||||||
|
<highlighting>
|
||||||
|
<!-- Configure the standard fragmenter -->
|
||||||
|
<!-- This could most likely be commented out in the "default" case -->
|
||||||
|
<fragmenter name="gap"
|
||||||
|
default="true"
|
||||||
|
class="solr.highlight.GapFragmenter">
|
||||||
|
<lst name="defaults">
|
||||||
|
<int name="hl.fragsize">100</int>
|
||||||
|
</lst>
|
||||||
|
</fragmenter>
|
||||||
|
|
||||||
|
<!-- A regular-expression-based fragmenter
|
||||||
|
(for sentence extraction)
|
||||||
|
-->
|
||||||
|
<fragmenter name="regex"
|
||||||
|
class="solr.highlight.RegexFragmenter">
|
||||||
|
<lst name="defaults">
|
||||||
|
<!-- slightly smaller fragsizes work better because of slop -->
|
||||||
|
<int name="hl.fragsize">70</int>
|
||||||
|
<!-- allow 50% slop on fragment sizes -->
|
||||||
|
<float name="hl.regex.slop">0.5</float>
|
||||||
|
<!-- a basic sentence pattern -->
|
||||||
|
<str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str>
|
||||||
|
</lst>
|
||||||
|
</fragmenter>
|
||||||
|
|
||||||
|
<!-- Configure the standard formatter -->
|
||||||
|
<formatter name="html"
|
||||||
|
default="true"
|
||||||
|
class="solr.highlight.HtmlFormatter">
|
||||||
|
<lst name="defaults">
|
||||||
|
<str name="hl.simple.pre"><![CDATA[<em>]]></str>
|
||||||
|
<str name="hl.simple.post"><![CDATA[</em>]]></str>
|
||||||
|
</lst>
|
||||||
|
</formatter>
|
||||||
|
|
||||||
|
<!-- Configure the standard encoder -->
|
||||||
|
<encoder name="html"
|
||||||
|
class="solr.highlight.HtmlEncoder" />
|
||||||
|
|
||||||
|
<!-- Configure the standard fragListBuilder -->
|
||||||
|
<fragListBuilder name="simple"
|
||||||
|
class="solr.highlight.SimpleFragListBuilder"/>
|
||||||
|
|
||||||
|
<!-- Configure the single fragListBuilder -->
|
||||||
|
<fragListBuilder name="single"
|
||||||
|
class="solr.highlight.SingleFragListBuilder"/>
|
||||||
|
|
||||||
|
<!-- Configure the weighted fragListBuilder -->
|
||||||
|
<fragListBuilder name="weighted"
|
||||||
|
default="true"
|
||||||
|
class="solr.highlight.WeightedFragListBuilder"/>
|
||||||
|
|
||||||
|
<!-- default tag FragmentsBuilder -->
|
||||||
|
<fragmentsBuilder name="default"
|
||||||
|
default="true"
|
||||||
|
class="solr.highlight.ScoreOrderFragmentsBuilder">
|
||||||
|
<!--
|
||||||
|
<lst name="defaults">
|
||||||
|
<str name="hl.multiValuedSeparatorChar">/</str>
|
||||||
|
</lst>
|
||||||
|
-->
|
||||||
|
</fragmentsBuilder>
|
||||||
|
|
||||||
|
<!-- multi-colored tag FragmentsBuilder -->
|
||||||
|
<fragmentsBuilder name="colored"
|
||||||
|
class="solr.highlight.ScoreOrderFragmentsBuilder">
|
||||||
|
<lst name="defaults">
|
||||||
|
<str name="hl.tag.pre"><![CDATA[
|
||||||
|
<b style="background:yellow">,<b style="background:lawgreen">,
|
||||||
|
<b style="background:aquamarine">,<b style="background:magenta">,
|
||||||
|
<b style="background:palegreen">,<b style="background:coral">,
|
||||||
|
<b style="background:wheat">,<b style="background:khaki">,
|
||||||
|
<b style="background:lime">,<b style="background:deepskyblue">]]></str>
|
||||||
|
<str name="hl.tag.post"><![CDATA[</b>]]></str>
|
||||||
|
</lst>
|
||||||
|
</fragmentsBuilder>
|
||||||
|
|
||||||
|
<boundaryScanner name="default"
|
||||||
|
default="true"
|
||||||
|
class="solr.highlight.SimpleBoundaryScanner">
|
||||||
|
<lst name="defaults">
|
||||||
|
<str name="hl.bs.maxScan">10</str>
|
||||||
|
<str name="hl.bs.chars">.,!? 	 </str>
|
||||||
|
</lst>
|
||||||
|
</boundaryScanner>
|
||||||
|
|
||||||
|
<boundaryScanner name="breakIterator"
|
||||||
|
class="solr.highlight.BreakIteratorBoundaryScanner">
|
||||||
|
<lst name="defaults">
|
||||||
|
<!-- type should be one of CHARACTER, WORD(default), LINE and SENTENCE -->
|
||||||
|
<str name="hl.bs.type">WORD</str>
|
||||||
|
<!-- language and country are used when constructing Locale object. -->
|
||||||
|
<!-- And the Locale object will be used when getting instance of BreakIterator -->
|
||||||
|
<str name="hl.bs.language">en</str>
|
||||||
|
<str name="hl.bs.country">US</str>
|
||||||
|
</lst>
|
||||||
|
</boundaryScanner>
|
||||||
|
</highlighting>
|
||||||
|
</searchComponent>
|
||||||
|
|
||||||
|
<!-- Update Processors
|
||||||
|
|
||||||
|
Chains of Update Processor Factories for dealing with Update
|
||||||
|
Requests can be declared, and then used by name in Update
|
||||||
|
Request Processors
|
||||||
|
|
||||||
|
http://wiki.apache.org/solr/UpdateRequestProcessor
|
||||||
|
|
||||||
|
-->
|
||||||
|
<!-- Deduplication
|
||||||
|
|
||||||
|
An example dedup update processor that creates the "id" field
|
||||||
|
on the fly based on the hash code of some other fields. This
|
||||||
|
example has overwriteDupes set to false since we are using the
|
||||||
|
id field as the signatureField and Solr will maintain
|
||||||
|
uniqueness based on that anyway.
|
||||||
|
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<updateRequestProcessorChain name="dedupe">
|
||||||
|
<processor class="solr.processor.SignatureUpdateProcessorFactory">
|
||||||
|
<bool name="enabled">true</bool>
|
||||||
|
<str name="signatureField">id</str>
|
||||||
|
<bool name="overwriteDupes">false</bool>
|
||||||
|
<str name="fields">name,features,cat</str>
|
||||||
|
<str name="signatureClass">solr.processor.Lookup3Signature</str>
|
||||||
|
</processor>
|
||||||
|
<processor class="solr.LogUpdateProcessorFactory" />
|
||||||
|
<processor class="solr.RunUpdateProcessorFactory" />
|
||||||
|
</updateRequestProcessorChain>
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Language identification
|
||||||
|
|
||||||
|
This example update chain identifies the language of the incoming
|
||||||
|
documents using the langid contrib. The detected language is
|
||||||
|
written to field language_s. No field name mapping is done.
|
||||||
|
The fields used for detection are text, title, subject and description,
|
||||||
|
making this example suitable for detecting languages form full-text
|
||||||
|
rich documents injected via ExtractingRequestHandler.
|
||||||
|
See more about langId at http://wiki.apache.org/solr/LanguageDetection
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<updateRequestProcessorChain name="langid">
|
||||||
|
<processor class="org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory">
|
||||||
|
<str name="langid.fl">text,title,subject,description</str>
|
||||||
|
<str name="langid.langField">language_s</str>
|
||||||
|
<str name="langid.fallback">en</str>
|
||||||
|
</processor>
|
||||||
|
<processor class="solr.LogUpdateProcessorFactory" />
|
||||||
|
<processor class="solr.RunUpdateProcessorFactory" />
|
||||||
|
</updateRequestProcessorChain>
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Script update processor
|
||||||
|
|
||||||
|
This example hooks in an update processor implemented using JavaScript.
|
||||||
|
|
||||||
|
See more about the script update processor at http://wiki.apache.org/solr/ScriptUpdateProcessor
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<updateRequestProcessorChain name="script">
|
||||||
|
<processor class="solr.StatelessScriptUpdateProcessorFactory">
|
||||||
|
<str name="script">update-script.js</str>
|
||||||
|
<lst name="params">
|
||||||
|
<str name="config_param">example config parameter</str>
|
||||||
|
</lst>
|
||||||
|
</processor>
|
||||||
|
<processor class="solr.RunUpdateProcessorFactory" />
|
||||||
|
</updateRequestProcessorChain>
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Response Writers
|
||||||
|
|
||||||
|
http://wiki.apache.org/solr/QueryResponseWriter
|
||||||
|
|
||||||
|
Request responses will be written using the writer specified by
|
||||||
|
the 'wt' request parameter matching the name of a registered
|
||||||
|
writer.
|
||||||
|
|
||||||
|
The "default" writer is the default and will be used if 'wt' is
|
||||||
|
not specified in the request.
|
||||||
|
-->
|
||||||
|
<!-- The following response writers are implicitly configured unless
|
||||||
|
overridden...
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<queryResponseWriter name="xml"
|
||||||
|
default="true"
|
||||||
|
class="solr.XMLResponseWriter" />
|
||||||
|
<queryResponseWriter name="json" class="solr.JSONResponseWriter"/>
|
||||||
|
<queryResponseWriter name="python" class="solr.PythonResponseWriter"/>
|
||||||
|
<queryResponseWriter name="ruby" class="solr.RubyResponseWriter"/>
|
||||||
|
<queryResponseWriter name="php" class="solr.PHPResponseWriter"/>
|
||||||
|
<queryResponseWriter name="phps" class="solr.PHPSerializedResponseWriter"/>
|
||||||
|
<queryResponseWriter name="csv" class="solr.CSVResponseWriter"/>
|
||||||
|
<queryResponseWriter name="schema.xml" class="solr.SchemaXmlResponseWriter"/>
|
||||||
|
-->
|
||||||
|
|
||||||
|
<queryResponseWriter name="json" class="solr.JSONResponseWriter">
|
||||||
|
<!-- For the purposes of the tutorial, JSON responses are written as
|
||||||
|
plain text so that they are easy to read in *any* browser.
|
||||||
|
If you expect a MIME type of "application/json" just remove this override.
|
||||||
|
-->
|
||||||
|
<str name="content-type">text/plain; charset=UTF-8</str>
|
||||||
|
</queryResponseWriter>
|
||||||
|
|
||||||
|
<!--
|
||||||
|
Custom response writers can be declared as needed...
|
||||||
|
-->
|
||||||
|
<queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"/>
|
||||||
|
|
||||||
|
|
||||||
|
<!-- XSLT response writer transforms the XML output by any xslt file found
|
||||||
|
in Solr's conf/xslt directory. Changes to xslt files are checked for
|
||||||
|
every xsltCacheLifetimeSeconds.
|
||||||
|
-->
|
||||||
|
<queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
|
||||||
|
<int name="xsltCacheLifetimeSeconds">5</int>
|
||||||
|
</queryResponseWriter>
|
||||||
|
|
||||||
|
<!-- Query Parsers
|
||||||
|
|
||||||
|
http://wiki.apache.org/solr/SolrQuerySyntax
|
||||||
|
|
||||||
|
Multiple QParserPlugins can be registered by name, and then
|
||||||
|
used in either the "defType" param for the QueryComponent (used
|
||||||
|
by SearchHandler) or in LocalParams
|
||||||
|
-->
|
||||||
|
<!-- example of registering a query parser -->
|
||||||
|
<!--
|
||||||
|
<queryParser name="myparser" class="com.mycompany.MyQParserPlugin"/>
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Function Parsers
|
||||||
|
|
||||||
|
http://wiki.apache.org/solr/FunctionQuery
|
||||||
|
|
||||||
|
Multiple ValueSourceParsers can be registered by name, and then
|
||||||
|
used as function names when using the "func" QParser.
|
||||||
|
-->
|
||||||
|
<!-- example of registering a custom function parser -->
|
||||||
|
<!--
|
||||||
|
<valueSourceParser name="myfunc"
|
||||||
|
class="com.mycompany.MyValueSourceParser" />
|
||||||
|
-->
|
||||||
|
|
||||||
|
|
||||||
|
<!-- Document Transformers
|
||||||
|
http://wiki.apache.org/solr/DocTransformers
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
Could be something like:
|
||||||
|
<transformer name="db" class="com.mycompany.LoadFromDatabaseTransformer" >
|
||||||
|
<int name="connection">jdbc://....</int>
|
||||||
|
</transformer>
|
||||||
|
|
||||||
|
To add a constant value to all docs, use:
|
||||||
|
<transformer name="mytrans2" class="org.apache.solr.response.transform.ValueAugmenterFactory" >
|
||||||
|
<int name="value">5</int>
|
||||||
|
</transformer>
|
||||||
|
|
||||||
|
If you want the user to still be able to change it with _value:something_ use this:
|
||||||
|
<transformer name="mytrans3" class="org.apache.solr.response.transform.ValueAugmenterFactory" >
|
||||||
|
<double name="defaultValue">5</double>
|
||||||
|
</transformer>
|
||||||
|
|
||||||
|
If you are using the QueryElevationComponent, you may wish to mark documents that get boosted. The
|
||||||
|
EditorialMarkerFactory will do exactly that:
|
||||||
|
<transformer name="qecBooster" class="org.apache.solr.response.transform.EditorialMarkerFactory" />
|
||||||
|
-->
|
||||||
|
|
||||||
|
|
||||||
|
<!-- Legacy config for the admin interface -->
|
||||||
|
<admin>
|
||||||
|
<defaultQuery>*:*</defaultQuery>
|
||||||
|
</admin>
|
||||||
|
|
||||||
|
</config>
|
|
@ -0,0 +1,58 @@
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
#-----------------------------------------------------------------------
|
||||||
|
# a couple of test stopwords to test that the words are really being
|
||||||
|
# configured from this file:
|
||||||
|
stopworda
|
||||||
|
stopwordb
|
||||||
|
|
||||||
|
#Standard english stop words taken from Lucene's StopAnalyzer
|
||||||
|
a
|
||||||
|
an
|
||||||
|
and
|
||||||
|
are
|
||||||
|
as
|
||||||
|
at
|
||||||
|
be
|
||||||
|
but
|
||||||
|
by
|
||||||
|
for
|
||||||
|
if
|
||||||
|
in
|
||||||
|
into
|
||||||
|
is
|
||||||
|
it
|
||||||
|
no
|
||||||
|
not
|
||||||
|
of
|
||||||
|
on
|
||||||
|
or
|
||||||
|
s
|
||||||
|
such
|
||||||
|
t
|
||||||
|
that
|
||||||
|
the
|
||||||
|
their
|
||||||
|
then
|
||||||
|
there
|
||||||
|
these
|
||||||
|
they
|
||||||
|
this
|
||||||
|
to
|
||||||
|
was
|
||||||
|
will
|
||||||
|
with
|
||||||
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
#-----------------------------------------------------------------------
|
||||||
|
#some test synonym mappings unlikely to appear in real input text
|
||||||
|
aaafoo => aaabar
|
||||||
|
bbbfoo => bbbfoo bbbbar
|
||||||
|
cccfoo => cccbar cccbaz
|
||||||
|
fooaaa,baraaa,bazaaa
|
||||||
|
|
||||||
|
# Some synonym groups specific to this example
|
||||||
|
GB,gib,gigabyte,gigabytes
|
||||||
|
MB,mib,megabyte,megabytes
|
||||||
|
Television, Televisions, TV, TVs
|
||||||
|
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
|
||||||
|
#after us won't split it into two words.
|
||||||
|
|
||||||
|
# Synonym mappings can be used for spelling correction too
|
||||||
|
pixima => pixma
|
||||||
|
|
|
@ -0,0 +1,68 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" ?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!--
|
||||||
|
This is the Solr schema file. This file should be named "schema.xml" and
|
||||||
|
should be in the conf directory under the solr home
|
||||||
|
(i.e. ./solr/conf/schema.xml by default)
|
||||||
|
or located where the classloader for the Solr webapp can find it.
|
||||||
|
|
||||||
|
This example schema is the recommended starting point for users.
|
||||||
|
It should be kept correct and concise, usable out-of-the-box.
|
||||||
|
|
||||||
|
For more information, on how to customize this file, please see
|
||||||
|
http://wiki.apache.org/solr/SchemaXml
|
||||||
|
|
||||||
|
PERFORMANCE NOTE: this schema includes many optional features and should not
|
||||||
|
be used for benchmarking. To improve performance one could
|
||||||
|
- set stored="false" for all fields possible (esp large fields) when you
|
||||||
|
only need to search on the field but don't need to return the original
|
||||||
|
value.
|
||||||
|
- set indexed="false" if you don't need to search on the field, but only
|
||||||
|
return the field as a result of searching on other indexed fields.
|
||||||
|
- remove all unneeded copyField statements
|
||||||
|
- for best index size and searching performance, set "index" to false
|
||||||
|
for all general text fields, use copyField to copy them to the
|
||||||
|
catchall "text" field, and use that for searching.
|
||||||
|
- For maximum indexing performance, use the StreamingUpdateSolrServer
|
||||||
|
java client.
|
||||||
|
- Remember to run the JVM in server mode, and use a higher logging level
|
||||||
|
that avoids logging every request
|
||||||
|
-->
|
||||||
|
|
||||||
|
<schema name="$IndexName" version="1.5">
|
||||||
|
|
||||||
|
<types>
|
||||||
|
$Types
|
||||||
|
</types>
|
||||||
|
|
||||||
|
<fields>
|
||||||
|
$FieldDefinitions
|
||||||
|
|
||||||
|
<field name="_version_" type="long" indexed="true" stored="true" multiValued="false"/>
|
||||||
|
</fields>
|
||||||
|
|
||||||
|
$CopyFieldDefinitions
|
||||||
|
|
||||||
|
<uniqueKey>_documentid</uniqueKey>
|
||||||
|
|
||||||
|
<defaultSearchField>_text</defaultSearchField>
|
||||||
|
|
||||||
|
<solrQueryParser defaultOperator="OR"/>
|
||||||
|
|
||||||
|
</schema>
|
|
@ -0,0 +1,360 @@
|
||||||
|
<!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
|
||||||
|
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
|
||||||
|
|
||||||
|
<!-- boolean type: "true" or "false" -->
|
||||||
|
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
|
||||||
|
<!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
|
||||||
|
<fieldtype name="binary" class="solr.BinaryField"/>
|
||||||
|
|
||||||
|
<!-- The optional sortMissingLast and sortMissingFirst attributes are
|
||||||
|
currently supported on types that are sorted internally as strings.
|
||||||
|
This includes "string","boolean","sint","slong","sfloat","sdouble","pdate"
|
||||||
|
- If sortMissingLast="true", then a sort on this field will cause documents
|
||||||
|
without the field to come after documents with the field,
|
||||||
|
regardless of the requested sort order (asc or desc).
|
||||||
|
- If sortMissingFirst="true", then a sort on this field will cause documents
|
||||||
|
without the field to come before documents with the field,
|
||||||
|
regardless of the requested sort order.
|
||||||
|
- If sortMissingLast="false" and sortMissingFirst="false" (the default),
|
||||||
|
then default lucene sorting will be used which places docs without the
|
||||||
|
field first in an ascending sort and last in a descending sort.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!--
|
||||||
|
Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
|
||||||
|
-->
|
||||||
|
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||||
|
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||||
|
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||||
|
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||||
|
|
||||||
|
<!--
|
||||||
|
Numeric field types that index each value at various levels of precision
|
||||||
|
to accelerate range queries when the number of values between the range
|
||||||
|
endpoints is large. See the javadoc for NumericRangeQuery for internal
|
||||||
|
implementation details.
|
||||||
|
|
||||||
|
Smaller precisionStep values (specified in bits) will lead to more tokens
|
||||||
|
indexed per value, slightly larger index size, and faster range queries.
|
||||||
|
A precisionStep of 0 disables indexing at different precision levels.
|
||||||
|
-->
|
||||||
|
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
||||||
|
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
||||||
|
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
||||||
|
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
||||||
|
|
||||||
|
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
|
||||||
|
is a more restricted form of the canonical representation of dateTime
|
||||||
|
http://www.w3.org/TR/xmlschema-2/#dateTime
|
||||||
|
The trailing "Z" designates UTC time and is mandatory.
|
||||||
|
Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
|
||||||
|
All other components are mandatory.
|
||||||
|
|
||||||
|
Expressions can also be used to denote calculations that should be
|
||||||
|
performed relative to "NOW" to determine the value, ie...
|
||||||
|
|
||||||
|
NOW/HOUR
|
||||||
|
... Round to the start of the current hour
|
||||||
|
NOW-1DAY
|
||||||
|
... Exactly 1 day prior to now
|
||||||
|
NOW/DAY+6MONTHS+3DAYS
|
||||||
|
... 6 months and 3 days in the future from the start of
|
||||||
|
the current day
|
||||||
|
|
||||||
|
Consult the DateField javadocs for more information.
|
||||||
|
|
||||||
|
Note: For faster range queries, consider the tdate type
|
||||||
|
-->
|
||||||
|
<fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
|
||||||
|
|
||||||
|
<!-- A Trie based date field for faster date range queries and date faceting. -->
|
||||||
|
<fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
|
||||||
|
|
||||||
|
<!-- The "RandomSortField" is not used to store or search any
|
||||||
|
data. You can declare fields of this type it in your schema
|
||||||
|
to generate pseudo-random orderings of your docs for sorting
|
||||||
|
purposes. The ordering is generated based on the field name
|
||||||
|
and the version of the index, As long as the index version
|
||||||
|
remains unchanged, and the same field name is reused,
|
||||||
|
the ordering of the docs will be consistent.
|
||||||
|
If you want different psuedo-random orderings of documents,
|
||||||
|
for the same version of the index, use a dynamicField and
|
||||||
|
change the name
|
||||||
|
-->
|
||||||
|
<fieldType name="random" class="solr.RandomSortField" indexed="true" />
|
||||||
|
|
||||||
|
<!-- solr.TextField allows the specification of custom text analyzers
|
||||||
|
specified as a tokenizer and a list of token filters. Different
|
||||||
|
analyzers may be specified for indexing and querying.
|
||||||
|
|
||||||
|
The optional positionIncrementGap puts space between multiple fields of
|
||||||
|
this type on the same document, with the purpose of preventing false phrase
|
||||||
|
matching across fields.
|
||||||
|
|
||||||
|
For more info on customizing your analyzer chain, please see
|
||||||
|
http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- One can also specify an existing Analyzer class that has a
|
||||||
|
default constructor via the class attribute on the analyzer element
|
||||||
|
<fieldType name="text_greek" class="solr.TextField">
|
||||||
|
<analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
|
||||||
|
</fieldType>
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- A text field that only splits on whitespace for exact matching of words -->
|
||||||
|
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
|
||||||
|
<analyzer>
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
<!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
|
||||||
|
words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
|
||||||
|
so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
|
||||||
|
Synonyms and stopwords are customized by external files, and stemming is enabled.
|
||||||
|
The attribute autoGeneratePhraseQueries="true" (the default) causes words that get split to
|
||||||
|
form phrase queries. For example, WordDelimiterFilter splitting text:pdp-11 will cause the parser
|
||||||
|
to generate text:"pdp 11" rather than (text:PDP OR text:11).
|
||||||
|
NOTE: autoGeneratePhraseQueries="true" tends to not work well for non whitespace delimited languages.
|
||||||
|
-->
|
||||||
|
<fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
||||||
|
<analyzer type="index">
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<!-- in this example, we will only use synonyms at query time
|
||||||
|
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
||||||
|
-->
|
||||||
|
<!-- Case insensitive stop word removal.
|
||||||
|
add enablePositionIncrements=true in both the index and query
|
||||||
|
analyzers to leave a 'gap' for more accurate phrase queries.
|
||||||
|
-->
|
||||||
|
<filter class="solr.StopFilterFactory"
|
||||||
|
ignoreCase="true"
|
||||||
|
words="stopwords.txt"
|
||||||
|
enablePositionIncrements="true"
|
||||||
|
/>
|
||||||
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||||
|
<filter class="solr.PorterStemFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
<analyzer type="query">
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||||
|
<filter class="solr.StopFilterFactory"
|
||||||
|
ignoreCase="true"
|
||||||
|
words="stopwords.txt"
|
||||||
|
enablePositionIncrements="true"
|
||||||
|
/>
|
||||||
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||||
|
<filter class="solr.PorterStemFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
<!-- A copy of text that has the HTMLStripCharFilterFactory as the first index analyzer, so that html can be provided -->
|
||||||
|
<fieldType name="htmltext" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
||||||
|
<analyzer type="index">
|
||||||
|
<charFilter class="solr.HTMLStripCharFilterFactory"/>
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/>
|
||||||
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||||
|
<filter class="solr.PorterStemFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
<analyzer type="query">
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||||
|
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/>
|
||||||
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||||
|
<filter class="solr.PorterStemFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
<!-- Less flexible matching, but less false matches. Probably not ideal for product names,
|
||||||
|
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
||||||
|
<fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
|
||||||
|
<analyzer>
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||||
|
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||||
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||||
|
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||||
|
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
||||||
|
possible with WordDelimiterFilter in conjuncton with stemming. -->
|
||||||
|
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
<!-- Text optimized for spelling corrections, with minimal alterations (e.g. no stemming) -->
|
||||||
|
<fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100">
|
||||||
|
<analyzer>
|
||||||
|
<tokenizer class="solr.StandardTokenizerFactory" />
|
||||||
|
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||||
|
<filter class="solr.LengthFilterFactory" min="4" max="20" />
|
||||||
|
<filter class="solr.LowerCaseFilterFactory" />
|
||||||
|
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
|
||||||
|
<!-- A general unstemmed text field - good if one does not know the language of the field -->
|
||||||
|
<fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
|
||||||
|
<analyzer type="index">
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
||||||
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
<analyzer type="query">
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||||
|
<filter class="solr.StopFilterFactory"
|
||||||
|
ignoreCase="true"
|
||||||
|
words="stopwords.txt"
|
||||||
|
enablePositionIncrements="true"
|
||||||
|
/>
|
||||||
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
|
||||||
|
<!-- A general unstemmed text field that indexes tokens normally and also
|
||||||
|
reversed (via ReversedWildcardFilterFactory), to enable more efficient
|
||||||
|
leading wildcard queries. -->
|
||||||
|
<fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100">
|
||||||
|
<analyzer type="index">
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
||||||
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
|
||||||
|
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
|
||||||
|
</analyzer>
|
||||||
|
<analyzer type="query">
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||||
|
<filter class="solr.StopFilterFactory"
|
||||||
|
ignoreCase="true"
|
||||||
|
words="stopwords.txt"
|
||||||
|
enablePositionIncrements="true"
|
||||||
|
/>
|
||||||
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
<!-- charFilter + WhitespaceTokenizer -->
|
||||||
|
<!--
|
||||||
|
<fieldType name="textCharNorm" class="solr.TextField" positionIncrementGap="100" >
|
||||||
|
<analyzer>
|
||||||
|
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- This is an example of using the KeywordTokenizer along
|
||||||
|
With various TokenFilterFactories to produce a sortable field
|
||||||
|
that does not include some properties of the source text
|
||||||
|
-->
|
||||||
|
<fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
|
||||||
|
<analyzer>
|
||||||
|
<!-- KeywordTokenizer does no actual tokenizing, so the entire
|
||||||
|
input string is preserved as a single token
|
||||||
|
-->
|
||||||
|
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||||
|
<!-- The LowerCase TokenFilter does what you expect, which can be
|
||||||
|
when you want your sorting to be case insensitive
|
||||||
|
-->
|
||||||
|
<filter class="solr.LowerCaseFilterFactory" />
|
||||||
|
<!-- The TrimFilter removes any leading or trailing whitespace -->
|
||||||
|
<filter class="solr.TrimFilterFactory" />
|
||||||
|
<!-- The PatternReplaceFilter gives you the flexibility to use
|
||||||
|
Java Regular expression to replace any sequence of characters
|
||||||
|
matching a pattern with an arbitrary replacement string,
|
||||||
|
which may include back references to portions of the original
|
||||||
|
string matched by the pattern.
|
||||||
|
|
||||||
|
See the Java Regular Expression documentation for more
|
||||||
|
information on pattern and replacement string syntax.
|
||||||
|
|
||||||
|
http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html
|
||||||
|
-->
|
||||||
|
<filter class="solr.PatternReplaceFilterFactory"
|
||||||
|
pattern="([^a-z])" replacement="" replace="all"
|
||||||
|
/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
<fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
|
||||||
|
<analyzer>
|
||||||
|
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||||
|
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldtype>
|
||||||
|
|
||||||
|
<fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
|
||||||
|
<analyzer>
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<!--
|
||||||
|
The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
|
||||||
|
a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
|
||||||
|
Attributes of the DelimitedPayloadTokenFilterFactory :
|
||||||
|
"delimiter" - a one character delimiter. Default is | (pipe)
|
||||||
|
"encoder" - how to encode the following value into a playload
|
||||||
|
float -> org.apache.lucene.analysis.payloads.FloatEncoder,
|
||||||
|
integer -> o.a.l.a.p.IntegerEncoder
|
||||||
|
identity -> o.a.l.a.p.IdentityEncoder
|
||||||
|
Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
|
||||||
|
-->
|
||||||
|
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldtype>
|
||||||
|
|
||||||
|
<!-- lowercases the entire field value, keeping it as a single token. -->
|
||||||
|
<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
|
||||||
|
<analyzer>
|
||||||
|
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory" />
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
<fieldType name="text_path" class="solr.TextField" positionIncrementGap="100">
|
||||||
|
<analyzer>
|
||||||
|
<tokenizer class="solr.PathHierarchyTokenizerFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
<!-- since fields of this type are by default not stored or indexed,
|
||||||
|
any data added to them will be ignored outright. -->
|
||||||
|
<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
|
||||||
|
|
||||||
|
<!-- This point type indexes the coordinates as separate fields (subFields)
|
||||||
|
If subFieldType is defined, it references a type, and a dynamic field
|
||||||
|
definition is created matching *___<typename>. Alternately, if
|
||||||
|
subFieldSuffix is defined, that is used to create the subFields.
|
||||||
|
Example: if subFieldType="double", then the coordinates would be
|
||||||
|
indexed in fields myloc_0___double,myloc_1___double.
|
||||||
|
Example: if subFieldSuffix="_d" then the coordinates would be indexed
|
||||||
|
in fields myloc_0_d,myloc_1_d
|
||||||
|
The subFields are an implementation detail of the fieldType, and end
|
||||||
|
users normally should not need to know about them.
|
||||||
|
-->
|
||||||
|
<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
|
||||||
|
|
||||||
|
<!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
|
||||||
|
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
|
||||||
|
|
||||||
|
<!--
|
||||||
|
A Geohash is a compact representation of a latitude longitude pair in a single field.
|
||||||
|
See http://wiki.apache.org/solr/SpatialSearch
|
||||||
|
-->
|
||||||
|
<fieldtype name="geohash" class="solr.GeoHashField"/>
|
|
@ -103,11 +103,11 @@ class SolrIndexTest extends SapphireTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function getServiceMock() {
|
protected function getServiceMock() {
|
||||||
return Phockito::mock('SolrService');
|
return Phockito::mock('Solr3Service');
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function getServiceSpy() {
|
protected function getServiceSpy() {
|
||||||
$serviceSpy = Phockito::spy('SolrService');
|
$serviceSpy = Phockito::spy('Solr3Service');
|
||||||
Phockito::when($serviceSpy)->_sendRawPost()->return($this->getFakeRawSolrResponse());
|
Phockito::when($serviceSpy)->_sendRawPost()->return($this->getFakeRawSolrResponse());
|
||||||
|
|
||||||
return $serviceSpy;
|
return $serviceSpy;
|
||||||
|
|
Loading…
Reference in New Issue