mirror of
https://github.com/silverstripe/silverstripe-fulltextsearch
synced 2024-10-22 14:05:29 +02:00
Merge pull request #35 from hafriedlander/solr4
API Add support for Solr 4 and make it the default
This commit is contained in:
commit
efdc96e937
@ -11,9 +11,13 @@ class Solr {
|
||||
* path (default: /solr) - The suburl the solr service is available on
|
||||
*
|
||||
* Optional fields:
|
||||
* extraspath (default: <basefolder>/fulltextsearch/conf/extras/) - Absolute path to
|
||||
* version (default: 4) - The Solr server version. Currently supports 3 and 4 (you can add a sub-version like 4.5 if
|
||||
* you like, but currently it has no effect)
|
||||
* service (default: depends on version, Solr3Service for 3, Solr4Service for 4)
|
||||
* the class that provides actual communcation to the Solr server
|
||||
* extraspath (default: <basefolder>/fulltextsearch/conf/solr/{version}/extras/) - Absolute path to
|
||||
* the folder containing templates which are used for generating the schema and field definitions.
|
||||
* templates (default: <basefolder>/fulltextsearch/conf/templates/) - Absolute path to
|
||||
* templates (default: <basefolder>/fulltextsearch/conf/solr/{version}/templates/) - Absolute path to
|
||||
* the configuration default files, e.g. solrconfig.xml.
|
||||
*
|
||||
* indexstore => an array with
|
||||
@ -29,36 +33,87 @@ class Solr {
|
||||
* path (default: /solrindex) - The suburl on the solr host that is set up to accept index configurations via webdav
|
||||
* remotepath - The path that the Solr server will read the index configurations from
|
||||
*/
|
||||
static $solr_options = array();
|
||||
protected static $solr_options = array();
|
||||
|
||||
/** A cache of solr_options with the defaults all merged in */
|
||||
protected static $merged_solr_options = null;
|
||||
|
||||
/**
|
||||
* Update the configuration for Solr. See $solr_options for a discussion of the accepted array keys
|
||||
* @param array $options - The options to update
|
||||
*/
|
||||
static function configure_server($options = array()) {
|
||||
self::$solr_options = array_merge(array(
|
||||
self::$solr_options = array_merge(self::$solr_options, $options);
|
||||
self::$merged_solr_options = null;
|
||||
|
||||
self::$service_singleton = null;
|
||||
self::$service_core_singletons = array();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the configured Solr options with the defaults all merged in
|
||||
* @return array - The merged options
|
||||
*/
|
||||
static function solr_options() {
|
||||
if (self::$merged_solr_options) return self::$merged_solr_options;
|
||||
|
||||
$defaults = array(
|
||||
'host' => 'localhost',
|
||||
'port' => 8983,
|
||||
'path' => '/solr',
|
||||
'extraspath' => Director::baseFolder().'/fulltextsearch/conf/extras/',
|
||||
'templatespath' => Director::baseFolder().'/fulltextsearch/conf/templates/',
|
||||
), self::$solr_options, $options);
|
||||
}
|
||||
'version' => '4'
|
||||
);
|
||||
|
||||
static protected $service_class = 'SolrService';
|
||||
// Build some by-version defaults
|
||||
$version = isset(self::$solr_options['version']) ? self::$solr_options['version'] : $defaults['version'];
|
||||
|
||||
static function set_service_class($class) {
|
||||
self::$service_class = $class;
|
||||
self::$service = null;
|
||||
}
|
||||
|
||||
static protected $service = null;
|
||||
|
||||
static function service($core = null) {
|
||||
if (!self::$service) {
|
||||
if (!self::$solr_options) user_error('No configuration for Solr server provided', E_USER_ERROR);
|
||||
|
||||
$class = self::$service_class;
|
||||
self::$service = new $class(self::$solr_options['host'], self::$solr_options['port'], self::$solr_options['path']);
|
||||
if (version_compare($version, '4', '>=')){
|
||||
$versionDefaults = array(
|
||||
'service' => 'Solr4Service',
|
||||
'extraspath' => Director::baseFolder().'/fulltextsearch/conf/solr/4/extras/',
|
||||
'templatespath' => Director::baseFolder().'/fulltextsearch/conf/solr/4/templates/',
|
||||
);
|
||||
}
|
||||
else {
|
||||
$versionDefaults = array(
|
||||
'service' => 'Solr3Service',
|
||||
'extraspath' => Director::baseFolder().'/fulltextsearch/conf/solr/3/extras/',
|
||||
'templatespath' => Director::baseFolder().'/fulltextsearch/conf/solr/3/templates/',
|
||||
);
|
||||
}
|
||||
|
||||
return $core ? self::$service->serviceForCore($core) : self::$service;
|
||||
return (self::$merged_solr_options = array_merge($defaults, $versionDefaults, self::$solr_options));
|
||||
}
|
||||
|
||||
|
||||
static function set_service_class($class) {
|
||||
user_error('set_service_class is deprecated - pass as part of $options to configure_server', E_USER_WARNING);
|
||||
self::configure_server(array('service' => $class));
|
||||
}
|
||||
|
||||
/** @var SolrService | null - The instance of SolrService for core management */
|
||||
static protected $service_singleton = null;
|
||||
/** @var [SolrService_Core] - The instances of SolrService_Core for each core */
|
||||
static protected $service_core_singletons = array();
|
||||
|
||||
static function service($core = null) {
|
||||
$options = self::solr_options();
|
||||
|
||||
if (!self::$service_singleton) {
|
||||
self::$service_singleton = Object::create(
|
||||
$options['service'], $options['host'], $options['port'], $options['path']
|
||||
);
|
||||
}
|
||||
|
||||
if ($core) {
|
||||
if (!isset(self::$service_core_singletons[$core])) {
|
||||
self::$service_core_singletons[$core] = self::$service_singleton->serviceForCore($core);
|
||||
}
|
||||
|
||||
return self::$service_core_singletons[$core];
|
||||
} else {
|
||||
return self::$service_singleton;
|
||||
}
|
||||
}
|
||||
|
||||
static function get_indexes() {
|
||||
@ -66,8 +121,8 @@ class Solr {
|
||||
}
|
||||
|
||||
/**
|
||||
* Include the thirdparty Solr client api library. Done this way to avoid issues where code is called in mysite/_config
|
||||
* before fulltextsearch/_config has a change to update the include path.
|
||||
* Include the thirdparty Solr client api library. Done this way to avoid issues where code is called in
|
||||
* mysite/_config before fulltextsearch/_config has a change to update the include path.
|
||||
*/
|
||||
static function include_client_api() {
|
||||
static $included = false;
|
||||
@ -88,8 +143,9 @@ class Solr_Configure extends BuildTask {
|
||||
public function run($request) {
|
||||
$service = Solr::service();
|
||||
$indexes = Solr::get_indexes();
|
||||
$options = Solr::solr_options();
|
||||
|
||||
if (!isset(Solr::$solr_options['indexstore']) || !($indexstore = Solr::$solr_options['indexstore'])) {
|
||||
if (!isset($options['indexstore']) || !($indexstore = $options['indexstore'])) {
|
||||
user_error('No index configuration for Solr provided', E_USER_ERROR);
|
||||
}
|
||||
|
||||
|
8
code/solr/Solr3Service.php
Normal file
8
code/solr/Solr3Service.php
Normal file
@ -0,0 +1,8 @@
|
||||
<?php
|
||||
|
||||
class Solr3Service_Core extends SolrService_Core {
|
||||
}
|
||||
|
||||
class Solr3Service extends SolrService {
|
||||
private static $core_class = 'Solr3Service_Core';
|
||||
}
|
25
code/solr/Solr4Service.php
Normal file
25
code/solr/Solr4Service.php
Normal file
@ -0,0 +1,25 @@
|
||||
<?php
|
||||
|
||||
class Solr4Service_Core extends SolrService_Core {
|
||||
|
||||
/**
|
||||
* Replace underlying commit function to remove waitFlush in 4.0+, since it's been deprecated and 4.4 throws errors
|
||||
* if you pass it
|
||||
*/
|
||||
public function commit($expungeDeletes = false, $waitFlush = null, $waitSearcher = true, $timeout = 3600) {
|
||||
if ($waitFlush) {
|
||||
user_error('waitFlush must be false when using Solr 4.0+' . E_USER_ERROR);
|
||||
}
|
||||
|
||||
$expungeValue = $expungeDeletes ? 'true' : 'false';
|
||||
$searcherValue = $waitSearcher ? 'true' : 'false';
|
||||
|
||||
$rawPost = '<commit expungeDeletes="' . $expungeValue . '" waitSearcher="' . $searcherValue . '" />';
|
||||
return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout);
|
||||
}
|
||||
}
|
||||
|
||||
class Solr4Service extends SolrService {
|
||||
private static $core_class = 'Solr4Service_Core';
|
||||
}
|
||||
|
@ -80,10 +80,12 @@ class SolrConfigStore_File implements SolrConfigStore {
|
||||
*/
|
||||
class SolrConfigStore_WebDAV implements SolrConfigStore {
|
||||
function __construct($config) {
|
||||
$options = Solr::solr_options();
|
||||
|
||||
$this->url = implode('', array(
|
||||
'http://',
|
||||
isset($config['auth']) ? $config['auth'].'@' : '',
|
||||
Solr::$solr_options['host'] . ':' . Solr::$solr_options['port'],
|
||||
$options['host'].':'.$options['port'],
|
||||
$config['path']
|
||||
));
|
||||
$this->remote = $config['remotepath'];
|
||||
|
@ -36,7 +36,8 @@ abstract class SolrIndex extends SearchIndex {
|
||||
* templates which are used for generating the schema and field definitions.
|
||||
*/
|
||||
function getTemplatesPath() {
|
||||
return $this->templatesPath ? $this->templatesPath : Solr::$solr_options['templatespath'];
|
||||
$globalOptions = Solr::solr_options();
|
||||
return $this->templatesPath ? $this->templatesPath : $globalOptions['templatespath'];
|
||||
}
|
||||
|
||||
/**
|
||||
@ -44,7 +45,8 @@ abstract class SolrIndex extends SearchIndex {
|
||||
* e.g. solrconfig.xml.
|
||||
*/
|
||||
function getExtrasPath() {
|
||||
return $this->extrasPath ? $this->extrasPath : Solr::$solr_options['extraspath'];
|
||||
$globalOptions = Solr::solr_options();
|
||||
return $this->extrasPath ? $this->extrasPath : $globalOptions['extraspath'];
|
||||
}
|
||||
|
||||
function generateSchema() {
|
||||
|
@ -2,10 +2,22 @@
|
||||
|
||||
Solr::include_client_api();
|
||||
|
||||
class SolrService extends Apache_Solr_Service {
|
||||
/**
|
||||
* The API for accessing a specific core of a Solr server. Exactly the same as Apache_Solr_Service for now.
|
||||
*/
|
||||
class SolrService_Core extends Apache_Solr_Service {
|
||||
}
|
||||
|
||||
/**
|
||||
* The API for accessing the primary Solr installation, which includes both SolrService_Core,
|
||||
* plus extra methods for interrogating, creating, reloading and getting SolrService_Core instances
|
||||
* for Solr cores.
|
||||
*/
|
||||
class SolrService extends SolrService_Core {
|
||||
private static $core_class = 'SolrService_Core';
|
||||
|
||||
/**
|
||||
* @return Apache_Solr_Response
|
||||
* Handle encoding the GET parameters and making the HTTP call to execute a core command
|
||||
*/
|
||||
protected function coreCommand($command, $core, $params=array()) {
|
||||
$command = strtoupper($command);
|
||||
@ -17,7 +29,9 @@ class SolrService extends Apache_Solr_Service {
|
||||
}
|
||||
|
||||
/**
|
||||
* @return boolean
|
||||
* Is the passed core active?
|
||||
* @param $core string - The name of the core
|
||||
* @return boolean - True if that core exists & is active
|
||||
*/
|
||||
public function coreIsActive($core) {
|
||||
$result = $this->coreCommand('STATUS', $core);
|
||||
@ -25,6 +39,12 @@ class SolrService extends Apache_Solr_Service {
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new core
|
||||
* @param $core string - The name of the core
|
||||
* @param $instancedir string - The base path of the core on the server
|
||||
* @param $config string - The filename of solrconfig.xml on the server. Default is $instancedir/solrconfig.xml
|
||||
* @param $schema string - The filename of schema.xml on the server. Default is $instancedir/schema.xml
|
||||
* @param $datadir string - The path to store data for this core on the server. Default depends on solrconfig.xml
|
||||
* @return Apache_Solr_Response
|
||||
*/
|
||||
public function coreCreate($core, $instancedir, $config=null, $schema=null, $datadir=null) {
|
||||
@ -37,19 +57,21 @@ class SolrService extends Apache_Solr_Service {
|
||||
}
|
||||
|
||||
/**
|
||||
* Reload a core
|
||||
* @param $core string - The name of the core
|
||||
* @return Apache_Solr_Response
|
||||
*/
|
||||
public function coreReload($core) {
|
||||
return $this->coreCommand('RELOAD', $core);
|
||||
}
|
||||
|
||||
protected $_serviceCache = array();
|
||||
|
||||
/**
|
||||
* Create a new Solr3Service_Core instance for the passed core
|
||||
* @param $core string - The name of the core
|
||||
* @return Solr3Service_Core
|
||||
*/
|
||||
public function serviceForCore($core) {
|
||||
if (!isset($this->_serviceCache[$core])) {
|
||||
$this->_serviceCache[$core] = new Apache_Solr_Service($this->_host, $this->_port, $this->_path."$core", $this->_httpTransport);
|
||||
}
|
||||
|
||||
return $this->_serviceCache[$core];
|
||||
$klass = Config::inst()->get(get_called_class(), 'core_class');
|
||||
return new $klass($this->_host, $this->_port, $this->_path.$core, $this->_httpTransport);
|
||||
}
|
||||
}
|
||||
|
36
conf/solr/4/extras/elevate.xml
Normal file
36
conf/solr/4/extras/elevate.xml
Normal file
@ -0,0 +1,36 @@
|
||||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!-- If this file is found in the config directory, it will only be
|
||||
loaded once at startup. If it is found in Solr's data
|
||||
directory, it will be re-loaded every commit.
|
||||
-->
|
||||
|
||||
<elevate>
|
||||
<query text="foo bar">
|
||||
<doc id="1" />
|
||||
<doc id="2" />
|
||||
<doc id="3" />
|
||||
</query>
|
||||
|
||||
<query text="ipod">
|
||||
<doc id="MA147LL/A" /> <!-- put the actual ipod at the top -->
|
||||
<doc id="IW-02" exclude="true" /> <!-- exclude this cable -->
|
||||
</query>
|
||||
|
||||
</elevate>
|
3813
conf/solr/4/extras/mapping-FoldToASCII.txt
Normal file
3813
conf/solr/4/extras/mapping-FoldToASCII.txt
Normal file
@ -0,0 +1,3813 @@
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# This map converts alphabetic, numeric, and symbolic Unicode characters
|
||||
# which are not in the first 127 ASCII characters (the "Basic Latin" Unicode
|
||||
# block) into their ASCII equivalents, if one exists.
|
||||
#
|
||||
# Characters from the following Unicode blocks are converted; however, only
|
||||
# those characters with reasonable ASCII alternatives are converted:
|
||||
#
|
||||
# - C1 Controls and Latin-1 Supplement: http://www.unicode.org/charts/PDF/U0080.pdf
|
||||
# - Latin Extended-A: http://www.unicode.org/charts/PDF/U0100.pdf
|
||||
# - Latin Extended-B: http://www.unicode.org/charts/PDF/U0180.pdf
|
||||
# - Latin Extended Additional: http://www.unicode.org/charts/PDF/U1E00.pdf
|
||||
# - Latin Extended-C: http://www.unicode.org/charts/PDF/U2C60.pdf
|
||||
# - Latin Extended-D: http://www.unicode.org/charts/PDF/UA720.pdf
|
||||
# - IPA Extensions: http://www.unicode.org/charts/PDF/U0250.pdf
|
||||
# - Phonetic Extensions: http://www.unicode.org/charts/PDF/U1D00.pdf
|
||||
# - Phonetic Extensions Supplement: http://www.unicode.org/charts/PDF/U1D80.pdf
|
||||
# - General Punctuation: http://www.unicode.org/charts/PDF/U2000.pdf
|
||||
# - Superscripts and Subscripts: http://www.unicode.org/charts/PDF/U2070.pdf
|
||||
# - Enclosed Alphanumerics: http://www.unicode.org/charts/PDF/U2460.pdf
|
||||
# - Dingbats: http://www.unicode.org/charts/PDF/U2700.pdf
|
||||
# - Supplemental Punctuation: http://www.unicode.org/charts/PDF/U2E00.pdf
|
||||
# - Alphabetic Presentation Forms: http://www.unicode.org/charts/PDF/UFB00.pdf
|
||||
# - Halfwidth and Fullwidth Forms: http://www.unicode.org/charts/PDF/UFF00.pdf
|
||||
#
|
||||
# See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode
|
||||
#
|
||||
# The set of character conversions supported by this map is a superset of
|
||||
# those supported by the map represented by mapping-ISOLatin1Accent.txt.
|
||||
#
|
||||
# See the bottom of this file for the Perl script used to generate the contents
|
||||
# of this file (without this header) from ASCIIFoldingFilter.java.
|
||||
|
||||
|
||||
# Syntax:
|
||||
# "source" => "target"
|
||||
# "source".length() > 0 (source cannot be empty.)
|
||||
# "target".length() >= 0 (target can be empty.)
|
||||
|
||||
|
||||
# À [LATIN CAPITAL LETTER A WITH GRAVE]
|
||||
"\u00C0" => "A"
|
||||
|
||||
# Á [LATIN CAPITAL LETTER A WITH ACUTE]
|
||||
"\u00C1" => "A"
|
||||
|
||||
# Â [LATIN CAPITAL LETTER A WITH CIRCUMFLEX]
|
||||
"\u00C2" => "A"
|
||||
|
||||
# Ã [LATIN CAPITAL LETTER A WITH TILDE]
|
||||
"\u00C3" => "A"
|
||||
|
||||
# Ä [LATIN CAPITAL LETTER A WITH DIAERESIS]
|
||||
"\u00C4" => "A"
|
||||
|
||||
# Å [LATIN CAPITAL LETTER A WITH RING ABOVE]
|
||||
"\u00C5" => "A"
|
||||
|
||||
# Ā [LATIN CAPITAL LETTER A WITH MACRON]
|
||||
"\u0100" => "A"
|
||||
|
||||
# Ă [LATIN CAPITAL LETTER A WITH BREVE]
|
||||
"\u0102" => "A"
|
||||
|
||||
# Ą [LATIN CAPITAL LETTER A WITH OGONEK]
|
||||
"\u0104" => "A"
|
||||
|
||||
# Ə http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA]
|
||||
"\u018F" => "A"
|
||||
|
||||
# Ǎ [LATIN CAPITAL LETTER A WITH CARON]
|
||||
"\u01CD" => "A"
|
||||
|
||||
# Ǟ [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON]
|
||||
"\u01DE" => "A"
|
||||
|
||||
# Ǡ [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON]
|
||||
"\u01E0" => "A"
|
||||
|
||||
# Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE]
|
||||
"\u01FA" => "A"
|
||||
|
||||
# Ȁ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE]
|
||||
"\u0200" => "A"
|
||||
|
||||
# Ȃ [LATIN CAPITAL LETTER A WITH INVERTED BREVE]
|
||||
"\u0202" => "A"
|
||||
|
||||
# Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE]
|
||||
"\u0226" => "A"
|
||||
|
||||
# Ⱥ [LATIN CAPITAL LETTER A WITH STROKE]
|
||||
"\u023A" => "A"
|
||||
|
||||
# ᴀ [LATIN LETTER SMALL CAPITAL A]
|
||||
"\u1D00" => "A"
|
||||
|
||||
# Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW]
|
||||
"\u1E00" => "A"
|
||||
|
||||
# Ạ [LATIN CAPITAL LETTER A WITH DOT BELOW]
|
||||
"\u1EA0" => "A"
|
||||
|
||||
# Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE]
|
||||
"\u1EA2" => "A"
|
||||
|
||||
# Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE]
|
||||
"\u1EA4" => "A"
|
||||
|
||||
# Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE]
|
||||
"\u1EA6" => "A"
|
||||
|
||||
# Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
|
||||
"\u1EA8" => "A"
|
||||
|
||||
# Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE]
|
||||
"\u1EAA" => "A"
|
||||
|
||||
# Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
|
||||
"\u1EAC" => "A"
|
||||
|
||||
# Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE]
|
||||
"\u1EAE" => "A"
|
||||
|
||||
# Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE]
|
||||
"\u1EB0" => "A"
|
||||
|
||||
# Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE]
|
||||
"\u1EB2" => "A"
|
||||
|
||||
# Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE]
|
||||
"\u1EB4" => "A"
|
||||
|
||||
# Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW]
|
||||
"\u1EB6" => "A"
|
||||
|
||||
# Ⓐ [CIRCLED LATIN CAPITAL LETTER A]
|
||||
"\u24B6" => "A"
|
||||
|
||||
# A [FULLWIDTH LATIN CAPITAL LETTER A]
|
||||
"\uFF21" => "A"
|
||||
|
||||
# à [LATIN SMALL LETTER A WITH GRAVE]
|
||||
"\u00E0" => "a"
|
||||
|
||||
# á [LATIN SMALL LETTER A WITH ACUTE]
|
||||
"\u00E1" => "a"
|
||||
|
||||
# â [LATIN SMALL LETTER A WITH CIRCUMFLEX]
|
||||
"\u00E2" => "a"
|
||||
|
||||
# ã [LATIN SMALL LETTER A WITH TILDE]
|
||||
"\u00E3" => "a"
|
||||
|
||||
# ä [LATIN SMALL LETTER A WITH DIAERESIS]
|
||||
"\u00E4" => "a"
|
||||
|
||||
# å [LATIN SMALL LETTER A WITH RING ABOVE]
|
||||
"\u00E5" => "a"
|
||||
|
||||
# ā [LATIN SMALL LETTER A WITH MACRON]
|
||||
"\u0101" => "a"
|
||||
|
||||
# ă [LATIN SMALL LETTER A WITH BREVE]
|
||||
"\u0103" => "a"
|
||||
|
||||
# ą [LATIN SMALL LETTER A WITH OGONEK]
|
||||
"\u0105" => "a"
|
||||
|
||||
# ǎ [LATIN SMALL LETTER A WITH CARON]
|
||||
"\u01CE" => "a"
|
||||
|
||||
# ǟ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON]
|
||||
"\u01DF" => "a"
|
||||
|
||||
# ǡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON]
|
||||
"\u01E1" => "a"
|
||||
|
||||
# ǻ [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE]
|
||||
"\u01FB" => "a"
|
||||
|
||||
# ȁ [LATIN SMALL LETTER A WITH DOUBLE GRAVE]
|
||||
"\u0201" => "a"
|
||||
|
||||
# ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE]
|
||||
"\u0203" => "a"
|
||||
|
||||
# ȧ [LATIN SMALL LETTER A WITH DOT ABOVE]
|
||||
"\u0227" => "a"
|
||||
|
||||
# ɐ [LATIN SMALL LETTER TURNED A]
|
||||
"\u0250" => "a"
|
||||
|
||||
# ə [LATIN SMALL LETTER SCHWA]
|
||||
"\u0259" => "a"
|
||||
|
||||
# ɚ [LATIN SMALL LETTER SCHWA WITH HOOK]
|
||||
"\u025A" => "a"
|
||||
|
||||
# ᶏ [LATIN SMALL LETTER A WITH RETROFLEX HOOK]
|
||||
"\u1D8F" => "a"
|
||||
|
||||
# ᶕ [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK]
|
||||
"\u1D95" => "a"
|
||||
|
||||
# ạ [LATIN SMALL LETTER A WITH RING BELOW]
|
||||
"\u1E01" => "a"
|
||||
|
||||
# ả [LATIN SMALL LETTER A WITH RIGHT HALF RING]
|
||||
"\u1E9A" => "a"
|
||||
|
||||
# ạ [LATIN SMALL LETTER A WITH DOT BELOW]
|
||||
"\u1EA1" => "a"
|
||||
|
||||
# ả [LATIN SMALL LETTER A WITH HOOK ABOVE]
|
||||
"\u1EA3" => "a"
|
||||
|
||||
# ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE]
|
||||
"\u1EA5" => "a"
|
||||
|
||||
# ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE]
|
||||
"\u1EA7" => "a"
|
||||
|
||||
# ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
|
||||
"\u1EA9" => "a"
|
||||
|
||||
# ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE]
|
||||
"\u1EAB" => "a"
|
||||
|
||||
# ậ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
|
||||
"\u1EAD" => "a"
|
||||
|
||||
# ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE]
|
||||
"\u1EAF" => "a"
|
||||
|
||||
# ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE]
|
||||
"\u1EB1" => "a"
|
||||
|
||||
# ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE]
|
||||
"\u1EB3" => "a"
|
||||
|
||||
# ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE]
|
||||
"\u1EB5" => "a"
|
||||
|
||||
# ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW]
|
||||
"\u1EB7" => "a"
|
||||
|
||||
# ₐ [LATIN SUBSCRIPT SMALL LETTER A]
|
||||
"\u2090" => "a"
|
||||
|
||||
# ₔ [LATIN SUBSCRIPT SMALL LETTER SCHWA]
|
||||
"\u2094" => "a"
|
||||
|
||||
# ⓐ [CIRCLED LATIN SMALL LETTER A]
|
||||
"\u24D0" => "a"
|
||||
|
||||
# ⱥ [LATIN SMALL LETTER A WITH STROKE]
|
||||
"\u2C65" => "a"
|
||||
|
||||
# Ɐ [LATIN CAPITAL LETTER TURNED A]
|
||||
"\u2C6F" => "a"
|
||||
|
||||
# a [FULLWIDTH LATIN SMALL LETTER A]
|
||||
"\uFF41" => "a"
|
||||
|
||||
# Ꜳ [LATIN CAPITAL LETTER AA]
|
||||
"\uA732" => "AA"
|
||||
|
||||
# Æ [LATIN CAPITAL LETTER AE]
|
||||
"\u00C6" => "AE"
|
||||
|
||||
# Ǣ [LATIN CAPITAL LETTER AE WITH MACRON]
|
||||
"\u01E2" => "AE"
|
||||
|
||||
# Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE]
|
||||
"\u01FC" => "AE"
|
||||
|
||||
# ᴁ [LATIN LETTER SMALL CAPITAL AE]
|
||||
"\u1D01" => "AE"
|
||||
|
||||
# Ꜵ [LATIN CAPITAL LETTER AO]
|
||||
"\uA734" => "AO"
|
||||
|
||||
# Ꜷ [LATIN CAPITAL LETTER AU]
|
||||
"\uA736" => "AU"
|
||||
|
||||
# Ꜹ [LATIN CAPITAL LETTER AV]
|
||||
"\uA738" => "AV"
|
||||
|
||||
# Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR]
|
||||
"\uA73A" => "AV"
|
||||
|
||||
# Ꜽ [LATIN CAPITAL LETTER AY]
|
||||
"\uA73C" => "AY"
|
||||
|
||||
# ⒜ [PARENTHESIZED LATIN SMALL LETTER A]
|
||||
"\u249C" => "(a)"
|
||||
|
||||
# ꜳ [LATIN SMALL LETTER AA]
|
||||
"\uA733" => "aa"
|
||||
|
||||
# æ [LATIN SMALL LETTER AE]
|
||||
"\u00E6" => "ae"
|
||||
|
||||
# ǣ [LATIN SMALL LETTER AE WITH MACRON]
|
||||
"\u01E3" => "ae"
|
||||
|
||||
# ǽ [LATIN SMALL LETTER AE WITH ACUTE]
|
||||
"\u01FD" => "ae"
|
||||
|
||||
# ᴂ [LATIN SMALL LETTER TURNED AE]
|
||||
"\u1D02" => "ae"
|
||||
|
||||
# ꜵ [LATIN SMALL LETTER AO]
|
||||
"\uA735" => "ao"
|
||||
|
||||
# ꜷ [LATIN SMALL LETTER AU]
|
||||
"\uA737" => "au"
|
||||
|
||||
# ꜹ [LATIN SMALL LETTER AV]
|
||||
"\uA739" => "av"
|
||||
|
||||
# ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR]
|
||||
"\uA73B" => "av"
|
||||
|
||||
# ꜽ [LATIN SMALL LETTER AY]
|
||||
"\uA73D" => "ay"
|
||||
|
||||
# Ɓ [LATIN CAPITAL LETTER B WITH HOOK]
|
||||
"\u0181" => "B"
|
||||
|
||||
# Ƃ [LATIN CAPITAL LETTER B WITH TOPBAR]
|
||||
"\u0182" => "B"
|
||||
|
||||
# Ƀ [LATIN CAPITAL LETTER B WITH STROKE]
|
||||
"\u0243" => "B"
|
||||
|
||||
# ʙ [LATIN LETTER SMALL CAPITAL B]
|
||||
"\u0299" => "B"
|
||||
|
||||
# ᴃ [LATIN LETTER SMALL CAPITAL BARRED B]
|
||||
"\u1D03" => "B"
|
||||
|
||||
# Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE]
|
||||
"\u1E02" => "B"
|
||||
|
||||
# Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW]
|
||||
"\u1E04" => "B"
|
||||
|
||||
# Ḇ [LATIN CAPITAL LETTER B WITH LINE BELOW]
|
||||
"\u1E06" => "B"
|
||||
|
||||
# Ⓑ [CIRCLED LATIN CAPITAL LETTER B]
|
||||
"\u24B7" => "B"
|
||||
|
||||
# B [FULLWIDTH LATIN CAPITAL LETTER B]
|
||||
"\uFF22" => "B"
|
||||
|
||||
# ƀ [LATIN SMALL LETTER B WITH STROKE]
|
||||
"\u0180" => "b"
|
||||
|
||||
# ƃ [LATIN SMALL LETTER B WITH TOPBAR]
|
||||
"\u0183" => "b"
|
||||
|
||||
# ɓ [LATIN SMALL LETTER B WITH HOOK]
|
||||
"\u0253" => "b"
|
||||
|
||||
# ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE]
|
||||
"\u1D6C" => "b"
|
||||
|
||||
# ᶀ [LATIN SMALL LETTER B WITH PALATAL HOOK]
|
||||
"\u1D80" => "b"
|
||||
|
||||
# ḃ [LATIN SMALL LETTER B WITH DOT ABOVE]
|
||||
"\u1E03" => "b"
|
||||
|
||||
# ḅ [LATIN SMALL LETTER B WITH DOT BELOW]
|
||||
"\u1E05" => "b"
|
||||
|
||||
# ḇ [LATIN SMALL LETTER B WITH LINE BELOW]
|
||||
"\u1E07" => "b"
|
||||
|
||||
# ⓑ [CIRCLED LATIN SMALL LETTER B]
|
||||
"\u24D1" => "b"
|
||||
|
||||
# b [FULLWIDTH LATIN SMALL LETTER B]
|
||||
"\uFF42" => "b"
|
||||
|
||||
# ⒝ [PARENTHESIZED LATIN SMALL LETTER B]
|
||||
"\u249D" => "(b)"
|
||||
|
||||
# Ç [LATIN CAPITAL LETTER C WITH CEDILLA]
|
||||
"\u00C7" => "C"
|
||||
|
||||
# Ć [LATIN CAPITAL LETTER C WITH ACUTE]
|
||||
"\u0106" => "C"
|
||||
|
||||
# Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX]
|
||||
"\u0108" => "C"
|
||||
|
||||
# Ċ [LATIN CAPITAL LETTER C WITH DOT ABOVE]
|
||||
"\u010A" => "C"
|
||||
|
||||
# Č [LATIN CAPITAL LETTER C WITH CARON]
|
||||
"\u010C" => "C"
|
||||
|
||||
# Ƈ [LATIN CAPITAL LETTER C WITH HOOK]
|
||||
"\u0187" => "C"
|
||||
|
||||
# Ȼ [LATIN CAPITAL LETTER C WITH STROKE]
|
||||
"\u023B" => "C"
|
||||
|
||||
# ʗ [LATIN LETTER STRETCHED C]
|
||||
"\u0297" => "C"
|
||||
|
||||
# ᴄ [LATIN LETTER SMALL CAPITAL C]
|
||||
"\u1D04" => "C"
|
||||
|
||||
# Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE]
|
||||
"\u1E08" => "C"
|
||||
|
||||
# Ⓒ [CIRCLED LATIN CAPITAL LETTER C]
|
||||
"\u24B8" => "C"
|
||||
|
||||
# C [FULLWIDTH LATIN CAPITAL LETTER C]
|
||||
"\uFF23" => "C"
|
||||
|
||||
# ç [LATIN SMALL LETTER C WITH CEDILLA]
|
||||
"\u00E7" => "c"
|
||||
|
||||
# ć [LATIN SMALL LETTER C WITH ACUTE]
|
||||
"\u0107" => "c"
|
||||
|
||||
# ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX]
|
||||
"\u0109" => "c"
|
||||
|
||||
# ċ [LATIN SMALL LETTER C WITH DOT ABOVE]
|
||||
"\u010B" => "c"
|
||||
|
||||
# č [LATIN SMALL LETTER C WITH CARON]
|
||||
"\u010D" => "c"
|
||||
|
||||
# ƈ [LATIN SMALL LETTER C WITH HOOK]
|
||||
"\u0188" => "c"
|
||||
|
||||
# ȼ [LATIN SMALL LETTER C WITH STROKE]
|
||||
"\u023C" => "c"
|
||||
|
||||
# ɕ [LATIN SMALL LETTER C WITH CURL]
|
||||
"\u0255" => "c"
|
||||
|
||||
# ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE]
|
||||
"\u1E09" => "c"
|
||||
|
||||
# ↄ [LATIN SMALL LETTER REVERSED C]
|
||||
"\u2184" => "c"
|
||||
|
||||
# ⓒ [CIRCLED LATIN SMALL LETTER C]
|
||||
"\u24D2" => "c"
|
||||
|
||||
# Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT]
|
||||
"\uA73E" => "c"
|
||||
|
||||
# ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT]
|
||||
"\uA73F" => "c"
|
||||
|
||||
# c [FULLWIDTH LATIN SMALL LETTER C]
|
||||
"\uFF43" => "c"
|
||||
|
||||
# ⒞ [PARENTHESIZED LATIN SMALL LETTER C]
|
||||
"\u249E" => "(c)"
|
||||
|
||||
# Ð [LATIN CAPITAL LETTER ETH]
|
||||
"\u00D0" => "D"
|
||||
|
||||
# Ď [LATIN CAPITAL LETTER D WITH CARON]
|
||||
"\u010E" => "D"
|
||||
|
||||
# Đ [LATIN CAPITAL LETTER D WITH STROKE]
|
||||
"\u0110" => "D"
|
||||
|
||||
# Ɖ [LATIN CAPITAL LETTER AFRICAN D]
|
||||
"\u0189" => "D"
|
||||
|
||||
# Ɗ [LATIN CAPITAL LETTER D WITH HOOK]
|
||||
"\u018A" => "D"
|
||||
|
||||
# Ƌ [LATIN CAPITAL LETTER D WITH TOPBAR]
|
||||
"\u018B" => "D"
|
||||
|
||||
# ᴅ [LATIN LETTER SMALL CAPITAL D]
|
||||
"\u1D05" => "D"
|
||||
|
||||
# ᴆ [LATIN LETTER SMALL CAPITAL ETH]
|
||||
"\u1D06" => "D"
|
||||
|
||||
# Ḋ [LATIN CAPITAL LETTER D WITH DOT ABOVE]
|
||||
"\u1E0A" => "D"
|
||||
|
||||
# Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW]
|
||||
"\u1E0C" => "D"
|
||||
|
||||
# Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW]
|
||||
"\u1E0E" => "D"
|
||||
|
||||
# Ḑ [LATIN CAPITAL LETTER D WITH CEDILLA]
|
||||
"\u1E10" => "D"
|
||||
|
||||
# Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW]
|
||||
"\u1E12" => "D"
|
||||
|
||||
# Ⓓ [CIRCLED LATIN CAPITAL LETTER D]
|
||||
"\u24B9" => "D"
|
||||
|
||||
# Ꝺ [LATIN CAPITAL LETTER INSULAR D]
|
||||
"\uA779" => "D"
|
||||
|
||||
# D [FULLWIDTH LATIN CAPITAL LETTER D]
|
||||
"\uFF24" => "D"
|
||||
|
||||
# ð [LATIN SMALL LETTER ETH]
|
||||
"\u00F0" => "d"
|
||||
|
||||
# ď [LATIN SMALL LETTER D WITH CARON]
|
||||
"\u010F" => "d"
|
||||
|
||||
# đ [LATIN SMALL LETTER D WITH STROKE]
|
||||
"\u0111" => "d"
|
||||
|
||||
# ƌ [LATIN SMALL LETTER D WITH TOPBAR]
|
||||
"\u018C" => "d"
|
||||
|
||||
# ȡ [LATIN SMALL LETTER D WITH CURL]
|
||||
"\u0221" => "d"
|
||||
|
||||
# ɖ [LATIN SMALL LETTER D WITH TAIL]
|
||||
"\u0256" => "d"
|
||||
|
||||
# ɗ [LATIN SMALL LETTER D WITH HOOK]
|
||||
"\u0257" => "d"
|
||||
|
||||
# ᵭ [LATIN SMALL LETTER D WITH MIDDLE TILDE]
|
||||
"\u1D6D" => "d"
|
||||
|
||||
# ᶁ [LATIN SMALL LETTER D WITH PALATAL HOOK]
|
||||
"\u1D81" => "d"
|
||||
|
||||
# ᶑ [LATIN SMALL LETTER D WITH HOOK AND TAIL]
|
||||
"\u1D91" => "d"
|
||||
|
||||
# ḋ [LATIN SMALL LETTER D WITH DOT ABOVE]
|
||||
"\u1E0B" => "d"
|
||||
|
||||
# ḍ [LATIN SMALL LETTER D WITH DOT BELOW]
|
||||
"\u1E0D" => "d"
|
||||
|
||||
# ḏ [LATIN SMALL LETTER D WITH LINE BELOW]
|
||||
"\u1E0F" => "d"
|
||||
|
||||
# ḑ [LATIN SMALL LETTER D WITH CEDILLA]
|
||||
"\u1E11" => "d"
|
||||
|
||||
# ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW]
|
||||
"\u1E13" => "d"
|
||||
|
||||
# ⓓ [CIRCLED LATIN SMALL LETTER D]
|
||||
"\u24D3" => "d"
|
||||
|
||||
# ꝺ [LATIN SMALL LETTER INSULAR D]
|
||||
"\uA77A" => "d"
|
||||
|
||||
# d [FULLWIDTH LATIN SMALL LETTER D]
|
||||
"\uFF44" => "d"
|
||||
|
||||
# DŽ [LATIN CAPITAL LETTER DZ WITH CARON]
|
||||
"\u01C4" => "DZ"
|
||||
|
||||
# DZ [LATIN CAPITAL LETTER DZ]
|
||||
"\u01F1" => "DZ"
|
||||
|
||||
# Dž [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON]
|
||||
"\u01C5" => "Dz"
|
||||
|
||||
# Dz [LATIN CAPITAL LETTER D WITH SMALL LETTER Z]
|
||||
"\u01F2" => "Dz"
|
||||
|
||||
# ⒟ [PARENTHESIZED LATIN SMALL LETTER D]
|
||||
"\u249F" => "(d)"
|
||||
|
||||
# ȸ [LATIN SMALL LETTER DB DIGRAPH]
|
||||
"\u0238" => "db"
|
||||
|
||||
# dž [LATIN SMALL LETTER DZ WITH CARON]
|
||||
"\u01C6" => "dz"
|
||||
|
||||
# dz [LATIN SMALL LETTER DZ]
|
||||
"\u01F3" => "dz"
|
||||
|
||||
# ʣ [LATIN SMALL LETTER DZ DIGRAPH]
|
||||
"\u02A3" => "dz"
|
||||
|
||||
# ʥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL]
|
||||
"\u02A5" => "dz"
|
||||
|
||||
# È [LATIN CAPITAL LETTER E WITH GRAVE]
|
||||
"\u00C8" => "E"
|
||||
|
||||
# É [LATIN CAPITAL LETTER E WITH ACUTE]
|
||||
"\u00C9" => "E"
|
||||
|
||||
# Ê [LATIN CAPITAL LETTER E WITH CIRCUMFLEX]
|
||||
"\u00CA" => "E"
|
||||
|
||||
# Ë [LATIN CAPITAL LETTER E WITH DIAERESIS]
|
||||
"\u00CB" => "E"
|
||||
|
||||
# Ē [LATIN CAPITAL LETTER E WITH MACRON]
|
||||
"\u0112" => "E"
|
||||
|
||||
# Ĕ [LATIN CAPITAL LETTER E WITH BREVE]
|
||||
"\u0114" => "E"
|
||||
|
||||
# Ė [LATIN CAPITAL LETTER E WITH DOT ABOVE]
|
||||
"\u0116" => "E"
|
||||
|
||||
# Ę [LATIN CAPITAL LETTER E WITH OGONEK]
|
||||
"\u0118" => "E"
|
||||
|
||||
# Ě [LATIN CAPITAL LETTER E WITH CARON]
|
||||
"\u011A" => "E"
|
||||
|
||||
# Ǝ [LATIN CAPITAL LETTER REVERSED E]
|
||||
"\u018E" => "E"
|
||||
|
||||
# Ɛ [LATIN CAPITAL LETTER OPEN E]
|
||||
"\u0190" => "E"
|
||||
|
||||
# Ȅ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE]
|
||||
"\u0204" => "E"
|
||||
|
||||
# Ȇ [LATIN CAPITAL LETTER E WITH INVERTED BREVE]
|
||||
"\u0206" => "E"
|
||||
|
||||
# Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA]
|
||||
"\u0228" => "E"
|
||||
|
||||
# Ɇ [LATIN CAPITAL LETTER E WITH STROKE]
|
||||
"\u0246" => "E"
|
||||
|
||||
# ᴇ [LATIN LETTER SMALL CAPITAL E]
|
||||
"\u1D07" => "E"
|
||||
|
||||
# Ḕ [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE]
|
||||
"\u1E14" => "E"
|
||||
|
||||
# Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE]
|
||||
"\u1E16" => "E"
|
||||
|
||||
# Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW]
|
||||
"\u1E18" => "E"
|
||||
|
||||
# Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW]
|
||||
"\u1E1A" => "E"
|
||||
|
||||
# Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE]
|
||||
"\u1E1C" => "E"
|
||||
|
||||
# Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW]
|
||||
"\u1EB8" => "E"
|
||||
|
||||
# Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE]
|
||||
"\u1EBA" => "E"
|
||||
|
||||
# Ẽ [LATIN CAPITAL LETTER E WITH TILDE]
|
||||
"\u1EBC" => "E"
|
||||
|
||||
# Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE]
|
||||
"\u1EBE" => "E"
|
||||
|
||||
# Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE]
|
||||
"\u1EC0" => "E"
|
||||
|
||||
# Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
|
||||
"\u1EC2" => "E"
|
||||
|
||||
# Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE]
|
||||
"\u1EC4" => "E"
|
||||
|
||||
# Ệ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
|
||||
"\u1EC6" => "E"
|
||||
|
||||
# Ⓔ [CIRCLED LATIN CAPITAL LETTER E]
|
||||
"\u24BA" => "E"
|
||||
|
||||
# ⱻ [LATIN LETTER SMALL CAPITAL TURNED E]
|
||||
"\u2C7B" => "E"
|
||||
|
||||
# E [FULLWIDTH LATIN CAPITAL LETTER E]
|
||||
"\uFF25" => "E"
|
||||
|
||||
# è [LATIN SMALL LETTER E WITH GRAVE]
|
||||
"\u00E8" => "e"
|
||||
|
||||
# é [LATIN SMALL LETTER E WITH ACUTE]
|
||||
"\u00E9" => "e"
|
||||
|
||||
# ê [LATIN SMALL LETTER E WITH CIRCUMFLEX]
|
||||
"\u00EA" => "e"
|
||||
|
||||
# ë [LATIN SMALL LETTER E WITH DIAERESIS]
|
||||
"\u00EB" => "e"
|
||||
|
||||
# ē [LATIN SMALL LETTER E WITH MACRON]
|
||||
"\u0113" => "e"
|
||||
|
||||
# ĕ [LATIN SMALL LETTER E WITH BREVE]
|
||||
"\u0115" => "e"
|
||||
|
||||
# ė [LATIN SMALL LETTER E WITH DOT ABOVE]
|
||||
"\u0117" => "e"
|
||||
|
||||
# ę [LATIN SMALL LETTER E WITH OGONEK]
|
||||
"\u0119" => "e"
|
||||
|
||||
# ě [LATIN SMALL LETTER E WITH CARON]
|
||||
"\u011B" => "e"
|
||||
|
||||
# ǝ [LATIN SMALL LETTER TURNED E]
|
||||
"\u01DD" => "e"
|
||||
|
||||
# ȅ [LATIN SMALL LETTER E WITH DOUBLE GRAVE]
|
||||
"\u0205" => "e"
|
||||
|
||||
# ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE]
|
||||
"\u0207" => "e"
|
||||
|
||||
# ȩ [LATIN SMALL LETTER E WITH CEDILLA]
|
||||
"\u0229" => "e"
|
||||
|
||||
# ɇ [LATIN SMALL LETTER E WITH STROKE]
|
||||
"\u0247" => "e"
|
||||
|
||||
# ɘ [LATIN SMALL LETTER REVERSED E]
|
||||
"\u0258" => "e"
|
||||
|
||||
# ɛ [LATIN SMALL LETTER OPEN E]
|
||||
"\u025B" => "e"
|
||||
|
||||
# ɜ [LATIN SMALL LETTER REVERSED OPEN E]
|
||||
"\u025C" => "e"
|
||||
|
||||
# ɝ [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK]
|
||||
"\u025D" => "e"
|
||||
|
||||
# ɞ [LATIN SMALL LETTER CLOSED REVERSED OPEN E]
|
||||
"\u025E" => "e"
|
||||
|
||||
# ʚ [LATIN SMALL LETTER CLOSED OPEN E]
|
||||
"\u029A" => "e"
|
||||
|
||||
# ᴈ [LATIN SMALL LETTER TURNED OPEN E]
|
||||
"\u1D08" => "e"
|
||||
|
||||
# ᶒ [LATIN SMALL LETTER E WITH RETROFLEX HOOK]
|
||||
"\u1D92" => "e"
|
||||
|
||||
# ᶓ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK]
|
||||
"\u1D93" => "e"
|
||||
|
||||
# ᶔ [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK]
|
||||
"\u1D94" => "e"
|
||||
|
||||
# ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE]
|
||||
"\u1E15" => "e"
|
||||
|
||||
# ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE]
|
||||
"\u1E17" => "e"
|
||||
|
||||
# ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW]
|
||||
"\u1E19" => "e"
|
||||
|
||||
# ḛ [LATIN SMALL LETTER E WITH TILDE BELOW]
|
||||
"\u1E1B" => "e"
|
||||
|
||||
# ḝ [LATIN SMALL LETTER E WITH CEDILLA AND BREVE]
|
||||
"\u1E1D" => "e"
|
||||
|
||||
# ẹ [LATIN SMALL LETTER E WITH DOT BELOW]
|
||||
"\u1EB9" => "e"
|
||||
|
||||
# ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE]
|
||||
"\u1EBB" => "e"
|
||||
|
||||
# ẽ [LATIN SMALL LETTER E WITH TILDE]
|
||||
"\u1EBD" => "e"
|
||||
|
||||
# ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE]
|
||||
"\u1EBF" => "e"
|
||||
|
||||
# ề [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE]
|
||||
"\u1EC1" => "e"
|
||||
|
||||
# ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
|
||||
"\u1EC3" => "e"
|
||||
|
||||
# ễ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE]
|
||||
"\u1EC5" => "e"
|
||||
|
||||
# ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
|
||||
"\u1EC7" => "e"
|
||||
|
||||
# ₑ [LATIN SUBSCRIPT SMALL LETTER E]
|
||||
"\u2091" => "e"
|
||||
|
||||
# ⓔ [CIRCLED LATIN SMALL LETTER E]
|
||||
"\u24D4" => "e"
|
||||
|
||||
# ⱸ [LATIN SMALL LETTER E WITH NOTCH]
|
||||
"\u2C78" => "e"
|
||||
|
||||
# e [FULLWIDTH LATIN SMALL LETTER E]
|
||||
"\uFF45" => "e"
|
||||
|
||||
# ⒠ [PARENTHESIZED LATIN SMALL LETTER E]
|
||||
"\u24A0" => "(e)"
|
||||
|
||||
# Ƒ [LATIN CAPITAL LETTER F WITH HOOK]
|
||||
"\u0191" => "F"
|
||||
|
||||
# Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE]
|
||||
"\u1E1E" => "F"
|
||||
|
||||
# Ⓕ [CIRCLED LATIN CAPITAL LETTER F]
|
||||
"\u24BB" => "F"
|
||||
|
||||
# ꜰ [LATIN LETTER SMALL CAPITAL F]
|
||||
"\uA730" => "F"
|
||||
|
||||
# Ꝼ [LATIN CAPITAL LETTER INSULAR F]
|
||||
"\uA77B" => "F"
|
||||
|
||||
# ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F]
|
||||
"\uA7FB" => "F"
|
||||
|
||||
# F [FULLWIDTH LATIN CAPITAL LETTER F]
|
||||
"\uFF26" => "F"
|
||||
|
||||
# ƒ [LATIN SMALL LETTER F WITH HOOK]
|
||||
"\u0192" => "f"
|
||||
|
||||
# ᵮ [LATIN SMALL LETTER F WITH MIDDLE TILDE]
|
||||
"\u1D6E" => "f"
|
||||
|
||||
# ᶂ [LATIN SMALL LETTER F WITH PALATAL HOOK]
|
||||
"\u1D82" => "f"
|
||||
|
||||
# ḟ [LATIN SMALL LETTER F WITH DOT ABOVE]
|
||||
"\u1E1F" => "f"
|
||||
|
||||
# ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE]
|
||||
"\u1E9B" => "f"
|
||||
|
||||
# ⓕ [CIRCLED LATIN SMALL LETTER F]
|
||||
"\u24D5" => "f"
|
||||
|
||||
# ꝼ [LATIN SMALL LETTER INSULAR F]
|
||||
"\uA77C" => "f"
|
||||
|
||||
# f [FULLWIDTH LATIN SMALL LETTER F]
|
||||
"\uFF46" => "f"
|
||||
|
||||
# ⒡ [PARENTHESIZED LATIN SMALL LETTER F]
|
||||
"\u24A1" => "(f)"
|
||||
|
||||
# ff [LATIN SMALL LIGATURE FF]
|
||||
"\uFB00" => "ff"
|
||||
|
||||
# ffi [LATIN SMALL LIGATURE FFI]
|
||||
"\uFB03" => "ffi"
|
||||
|
||||
# ffl [LATIN SMALL LIGATURE FFL]
|
||||
"\uFB04" => "ffl"
|
||||
|
||||
# fi [LATIN SMALL LIGATURE FI]
|
||||
"\uFB01" => "fi"
|
||||
|
||||
# fl [LATIN SMALL LIGATURE FL]
|
||||
"\uFB02" => "fl"
|
||||
|
||||
# Ĝ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX]
|
||||
"\u011C" => "G"
|
||||
|
||||
# Ğ [LATIN CAPITAL LETTER G WITH BREVE]
|
||||
"\u011E" => "G"
|
||||
|
||||
# Ġ [LATIN CAPITAL LETTER G WITH DOT ABOVE]
|
||||
"\u0120" => "G"
|
||||
|
||||
# Ģ [LATIN CAPITAL LETTER G WITH CEDILLA]
|
||||
"\u0122" => "G"
|
||||
|
||||
# Ɠ [LATIN CAPITAL LETTER G WITH HOOK]
|
||||
"\u0193" => "G"
|
||||
|
||||
# Ǥ [LATIN CAPITAL LETTER G WITH STROKE]
|
||||
"\u01E4" => "G"
|
||||
|
||||
# ǥ [LATIN SMALL LETTER G WITH STROKE]
|
||||
"\u01E5" => "G"
|
||||
|
||||
# Ǧ [LATIN CAPITAL LETTER G WITH CARON]
|
||||
"\u01E6" => "G"
|
||||
|
||||
# ǧ [LATIN SMALL LETTER G WITH CARON]
|
||||
"\u01E7" => "G"
|
||||
|
||||
# Ǵ [LATIN CAPITAL LETTER G WITH ACUTE]
|
||||
"\u01F4" => "G"
|
||||
|
||||
# ɢ [LATIN LETTER SMALL CAPITAL G]
|
||||
"\u0262" => "G"
|
||||
|
||||
# ʛ [LATIN LETTER SMALL CAPITAL G WITH HOOK]
|
||||
"\u029B" => "G"
|
||||
|
||||
# Ḡ [LATIN CAPITAL LETTER G WITH MACRON]
|
||||
"\u1E20" => "G"
|
||||
|
||||
# Ⓖ [CIRCLED LATIN CAPITAL LETTER G]
|
||||
"\u24BC" => "G"
|
||||
|
||||
# Ᵹ [LATIN CAPITAL LETTER INSULAR G]
|
||||
"\uA77D" => "G"
|
||||
|
||||
# Ꝿ [LATIN CAPITAL LETTER TURNED INSULAR G]
|
||||
"\uA77E" => "G"
|
||||
|
||||
# G [FULLWIDTH LATIN CAPITAL LETTER G]
|
||||
"\uFF27" => "G"
|
||||
|
||||
# ĝ [LATIN SMALL LETTER G WITH CIRCUMFLEX]
|
||||
"\u011D" => "g"
|
||||
|
||||
# ğ [LATIN SMALL LETTER G WITH BREVE]
|
||||
"\u011F" => "g"
|
||||
|
||||
# ġ [LATIN SMALL LETTER G WITH DOT ABOVE]
|
||||
"\u0121" => "g"
|
||||
|
||||
# ģ [LATIN SMALL LETTER G WITH CEDILLA]
|
||||
"\u0123" => "g"
|
||||
|
||||
# ǵ [LATIN SMALL LETTER G WITH ACUTE]
|
||||
"\u01F5" => "g"
|
||||
|
||||
# ɠ [LATIN SMALL LETTER G WITH HOOK]
|
||||
"\u0260" => "g"
|
||||
|
||||
# ɡ [LATIN SMALL LETTER SCRIPT G]
|
||||
"\u0261" => "g"
|
||||
|
||||
# ᵷ [LATIN SMALL LETTER TURNED G]
|
||||
"\u1D77" => "g"
|
||||
|
||||
# ᵹ [LATIN SMALL LETTER INSULAR G]
|
||||
"\u1D79" => "g"
|
||||
|
||||
# ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK]
|
||||
"\u1D83" => "g"
|
||||
|
||||
# ḡ [LATIN SMALL LETTER G WITH MACRON]
|
||||
"\u1E21" => "g"
|
||||
|
||||
# ⓖ [CIRCLED LATIN SMALL LETTER G]
|
||||
"\u24D6" => "g"
|
||||
|
||||
# ꝿ [LATIN SMALL LETTER TURNED INSULAR G]
|
||||
"\uA77F" => "g"
|
||||
|
||||
# g [FULLWIDTH LATIN SMALL LETTER G]
|
||||
"\uFF47" => "g"
|
||||
|
||||
# ⒢ [PARENTHESIZED LATIN SMALL LETTER G]
|
||||
"\u24A2" => "(g)"
|
||||
|
||||
# Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX]
|
||||
"\u0124" => "H"
|
||||
|
||||
# Ħ [LATIN CAPITAL LETTER H WITH STROKE]
|
||||
"\u0126" => "H"
|
||||
|
||||
# Ȟ [LATIN CAPITAL LETTER H WITH CARON]
|
||||
"\u021E" => "H"
|
||||
|
||||
# ʜ [LATIN LETTER SMALL CAPITAL H]
|
||||
"\u029C" => "H"
|
||||
|
||||
# Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE]
|
||||
"\u1E22" => "H"
|
||||
|
||||
# Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW]
|
||||
"\u1E24" => "H"
|
||||
|
||||
# Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS]
|
||||
"\u1E26" => "H"
|
||||
|
||||
# Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA]
|
||||
"\u1E28" => "H"
|
||||
|
||||
# Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW]
|
||||
"\u1E2A" => "H"
|
||||
|
||||
# Ⓗ [CIRCLED LATIN CAPITAL LETTER H]
|
||||
"\u24BD" => "H"
|
||||
|
||||
# Ⱨ [LATIN CAPITAL LETTER H WITH DESCENDER]
|
||||
"\u2C67" => "H"
|
||||
|
||||
# Ⱶ [LATIN CAPITAL LETTER HALF H]
|
||||
"\u2C75" => "H"
|
||||
|
||||
# H [FULLWIDTH LATIN CAPITAL LETTER H]
|
||||
"\uFF28" => "H"
|
||||
|
||||
# ĥ [LATIN SMALL LETTER H WITH CIRCUMFLEX]
|
||||
"\u0125" => "h"
|
||||
|
||||
# ħ [LATIN SMALL LETTER H WITH STROKE]
|
||||
"\u0127" => "h"
|
||||
|
||||
# ȟ [LATIN SMALL LETTER H WITH CARON]
|
||||
"\u021F" => "h"
|
||||
|
||||
# ɥ [LATIN SMALL LETTER TURNED H]
|
||||
"\u0265" => "h"
|
||||
|
||||
# ɦ [LATIN SMALL LETTER H WITH HOOK]
|
||||
"\u0266" => "h"
|
||||
|
||||
# ʮ [LATIN SMALL LETTER TURNED H WITH FISHHOOK]
|
||||
"\u02AE" => "h"
|
||||
|
||||
# ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL]
|
||||
"\u02AF" => "h"
|
||||
|
||||
# ḣ [LATIN SMALL LETTER H WITH DOT ABOVE]
|
||||
"\u1E23" => "h"
|
||||
|
||||
# ḥ [LATIN SMALL LETTER H WITH DOT BELOW]
|
||||
"\u1E25" => "h"
|
||||
|
||||
# ḧ [LATIN SMALL LETTER H WITH DIAERESIS]
|
||||
"\u1E27" => "h"
|
||||
|
||||
# ḩ [LATIN SMALL LETTER H WITH CEDILLA]
|
||||
"\u1E29" => "h"
|
||||
|
||||
# ḫ [LATIN SMALL LETTER H WITH BREVE BELOW]
|
||||
"\u1E2B" => "h"
|
||||
|
||||
# ẖ [LATIN SMALL LETTER H WITH LINE BELOW]
|
||||
"\u1E96" => "h"
|
||||
|
||||
# ⓗ [CIRCLED LATIN SMALL LETTER H]
|
||||
"\u24D7" => "h"
|
||||
|
||||
# ⱨ [LATIN SMALL LETTER H WITH DESCENDER]
|
||||
"\u2C68" => "h"
|
||||
|
||||
# ⱶ [LATIN SMALL LETTER HALF H]
|
||||
"\u2C76" => "h"
|
||||
|
||||
# h [FULLWIDTH LATIN SMALL LETTER H]
|
||||
"\uFF48" => "h"
|
||||
|
||||
# Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR]
|
||||
"\u01F6" => "HV"
|
||||
|
||||
# ⒣ [PARENTHESIZED LATIN SMALL LETTER H]
|
||||
"\u24A3" => "(h)"
|
||||
|
||||
# ƕ [LATIN SMALL LETTER HV]
|
||||
"\u0195" => "hv"
|
||||
|
||||
# Ì [LATIN CAPITAL LETTER I WITH GRAVE]
|
||||
"\u00CC" => "I"
|
||||
|
||||
# Í [LATIN CAPITAL LETTER I WITH ACUTE]
|
||||
"\u00CD" => "I"
|
||||
|
||||
# Î [LATIN CAPITAL LETTER I WITH CIRCUMFLEX]
|
||||
"\u00CE" => "I"
|
||||
|
||||
# Ï [LATIN CAPITAL LETTER I WITH DIAERESIS]
|
||||
"\u00CF" => "I"
|
||||
|
||||
# Ĩ [LATIN CAPITAL LETTER I WITH TILDE]
|
||||
"\u0128" => "I"
|
||||
|
||||
# Ī [LATIN CAPITAL LETTER I WITH MACRON]
|
||||
"\u012A" => "I"
|
||||
|
||||
# Ĭ [LATIN CAPITAL LETTER I WITH BREVE]
|
||||
"\u012C" => "I"
|
||||
|
||||
# Į [LATIN CAPITAL LETTER I WITH OGONEK]
|
||||
"\u012E" => "I"
|
||||
|
||||
# İ [LATIN CAPITAL LETTER I WITH DOT ABOVE]
|
||||
"\u0130" => "I"
|
||||
|
||||
# Ɩ [LATIN CAPITAL LETTER IOTA]
|
||||
"\u0196" => "I"
|
||||
|
||||
# Ɨ [LATIN CAPITAL LETTER I WITH STROKE]
|
||||
"\u0197" => "I"
|
||||
|
||||
# Ǐ [LATIN CAPITAL LETTER I WITH CARON]
|
||||
"\u01CF" => "I"
|
||||
|
||||
# Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE]
|
||||
"\u0208" => "I"
|
||||
|
||||
# Ȋ [LATIN CAPITAL LETTER I WITH INVERTED BREVE]
|
||||
"\u020A" => "I"
|
||||
|
||||
# ɪ [LATIN LETTER SMALL CAPITAL I]
|
||||
"\u026A" => "I"
|
||||
|
||||
# ᵻ [LATIN SMALL CAPITAL LETTER I WITH STROKE]
|
||||
"\u1D7B" => "I"
|
||||
|
||||
# Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW]
|
||||
"\u1E2C" => "I"
|
||||
|
||||
# Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE]
|
||||
"\u1E2E" => "I"
|
||||
|
||||
# Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE]
|
||||
"\u1EC8" => "I"
|
||||
|
||||
# Ị [LATIN CAPITAL LETTER I WITH DOT BELOW]
|
||||
"\u1ECA" => "I"
|
||||
|
||||
# Ⓘ [CIRCLED LATIN CAPITAL LETTER I]
|
||||
"\u24BE" => "I"
|
||||
|
||||
# ꟾ [LATIN EPIGRAPHIC LETTER I LONGA]
|
||||
"\uA7FE" => "I"
|
||||
|
||||
# I [FULLWIDTH LATIN CAPITAL LETTER I]
|
||||
"\uFF29" => "I"
|
||||
|
||||
# ì [LATIN SMALL LETTER I WITH GRAVE]
|
||||
"\u00EC" => "i"
|
||||
|
||||
# í [LATIN SMALL LETTER I WITH ACUTE]
|
||||
"\u00ED" => "i"
|
||||
|
||||
# î [LATIN SMALL LETTER I WITH CIRCUMFLEX]
|
||||
"\u00EE" => "i"
|
||||
|
||||
# ï [LATIN SMALL LETTER I WITH DIAERESIS]
|
||||
"\u00EF" => "i"
|
||||
|
||||
# ĩ [LATIN SMALL LETTER I WITH TILDE]
|
||||
"\u0129" => "i"
|
||||
|
||||
# ī [LATIN SMALL LETTER I WITH MACRON]
|
||||
"\u012B" => "i"
|
||||
|
||||
# ĭ [LATIN SMALL LETTER I WITH BREVE]
|
||||
"\u012D" => "i"
|
||||
|
||||
# į [LATIN SMALL LETTER I WITH OGONEK]
|
||||
"\u012F" => "i"
|
||||
|
||||
# ı [LATIN SMALL LETTER DOTLESS I]
|
||||
"\u0131" => "i"
|
||||
|
||||
# ǐ [LATIN SMALL LETTER I WITH CARON]
|
||||
"\u01D0" => "i"
|
||||
|
||||
# ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE]
|
||||
"\u0209" => "i"
|
||||
|
||||
# ȋ [LATIN SMALL LETTER I WITH INVERTED BREVE]
|
||||
"\u020B" => "i"
|
||||
|
||||
# ɨ [LATIN SMALL LETTER I WITH STROKE]
|
||||
"\u0268" => "i"
|
||||
|
||||
# ᴉ [LATIN SMALL LETTER TURNED I]
|
||||
"\u1D09" => "i"
|
||||
|
||||
# ᵢ [LATIN SUBSCRIPT SMALL LETTER I]
|
||||
"\u1D62" => "i"
|
||||
|
||||
# ᵼ [LATIN SMALL LETTER IOTA WITH STROKE]
|
||||
"\u1D7C" => "i"
|
||||
|
||||
# ᶖ [LATIN SMALL LETTER I WITH RETROFLEX HOOK]
|
||||
"\u1D96" => "i"
|
||||
|
||||
# ḭ [LATIN SMALL LETTER I WITH TILDE BELOW]
|
||||
"\u1E2D" => "i"
|
||||
|
||||
# ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE]
|
||||
"\u1E2F" => "i"
|
||||
|
||||
# ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE]
|
||||
"\u1EC9" => "i"
|
||||
|
||||
# ị [LATIN SMALL LETTER I WITH DOT BELOW]
|
||||
"\u1ECB" => "i"
|
||||
|
||||
# ⁱ [SUPERSCRIPT LATIN SMALL LETTER I]
|
||||
"\u2071" => "i"
|
||||
|
||||
# ⓘ [CIRCLED LATIN SMALL LETTER I]
|
||||
"\u24D8" => "i"
|
||||
|
||||
# i [FULLWIDTH LATIN SMALL LETTER I]
|
||||
"\uFF49" => "i"
|
||||
|
||||
# IJ [LATIN CAPITAL LIGATURE IJ]
|
||||
"\u0132" => "IJ"
|
||||
|
||||
# ⒤ [PARENTHESIZED LATIN SMALL LETTER I]
|
||||
"\u24A4" => "(i)"
|
||||
|
||||
# ij [LATIN SMALL LIGATURE IJ]
|
||||
"\u0133" => "ij"
|
||||
|
||||
# Ĵ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX]
|
||||
"\u0134" => "J"
|
||||
|
||||
# Ɉ [LATIN CAPITAL LETTER J WITH STROKE]
|
||||
"\u0248" => "J"
|
||||
|
||||
# ᴊ [LATIN LETTER SMALL CAPITAL J]
|
||||
"\u1D0A" => "J"
|
||||
|
||||
# Ⓙ [CIRCLED LATIN CAPITAL LETTER J]
|
||||
"\u24BF" => "J"
|
||||
|
||||
# J [FULLWIDTH LATIN CAPITAL LETTER J]
|
||||
"\uFF2A" => "J"
|
||||
|
||||
# ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX]
|
||||
"\u0135" => "j"
|
||||
|
||||
# ǰ [LATIN SMALL LETTER J WITH CARON]
|
||||
"\u01F0" => "j"
|
||||
|
||||
# ȷ [LATIN SMALL LETTER DOTLESS J]
|
||||
"\u0237" => "j"
|
||||
|
||||
# ɉ [LATIN SMALL LETTER J WITH STROKE]
|
||||
"\u0249" => "j"
|
||||
|
||||
# ɟ [LATIN SMALL LETTER DOTLESS J WITH STROKE]
|
||||
"\u025F" => "j"
|
||||
|
||||
# ʄ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK]
|
||||
"\u0284" => "j"
|
||||
|
||||
# ʝ [LATIN SMALL LETTER J WITH CROSSED-TAIL]
|
||||
"\u029D" => "j"
|
||||
|
||||
# ⓙ [CIRCLED LATIN SMALL LETTER J]
|
||||
"\u24D9" => "j"
|
||||
|
||||
# ⱼ [LATIN SUBSCRIPT SMALL LETTER J]
|
||||
"\u2C7C" => "j"
|
||||
|
||||
# j [FULLWIDTH LATIN SMALL LETTER J]
|
||||
"\uFF4A" => "j"
|
||||
|
||||
# ⒥ [PARENTHESIZED LATIN SMALL LETTER J]
|
||||
"\u24A5" => "(j)"
|
||||
|
||||
# Ķ [LATIN CAPITAL LETTER K WITH CEDILLA]
|
||||
"\u0136" => "K"
|
||||
|
||||
# Ƙ [LATIN CAPITAL LETTER K WITH HOOK]
|
||||
"\u0198" => "K"
|
||||
|
||||
# Ǩ [LATIN CAPITAL LETTER K WITH CARON]
|
||||
"\u01E8" => "K"
|
||||
|
||||
# ᴋ [LATIN LETTER SMALL CAPITAL K]
|
||||
"\u1D0B" => "K"
|
||||
|
||||
# Ḱ [LATIN CAPITAL LETTER K WITH ACUTE]
|
||||
"\u1E30" => "K"
|
||||
|
||||
# Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW]
|
||||
"\u1E32" => "K"
|
||||
|
||||
# Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW]
|
||||
"\u1E34" => "K"
|
||||
|
||||
# Ⓚ [CIRCLED LATIN CAPITAL LETTER K]
|
||||
"\u24C0" => "K"
|
||||
|
||||
# Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER]
|
||||
"\u2C69" => "K"
|
||||
|
||||
# Ꝁ [LATIN CAPITAL LETTER K WITH STROKE]
|
||||
"\uA740" => "K"
|
||||
|
||||
# Ꝃ [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE]
|
||||
"\uA742" => "K"
|
||||
|
||||
# Ꝅ [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE]
|
||||
"\uA744" => "K"
|
||||
|
||||
# K [FULLWIDTH LATIN CAPITAL LETTER K]
|
||||
"\uFF2B" => "K"
|
||||
|
||||
# ķ [LATIN SMALL LETTER K WITH CEDILLA]
|
||||
"\u0137" => "k"
|
||||
|
||||
# ƙ [LATIN SMALL LETTER K WITH HOOK]
|
||||
"\u0199" => "k"
|
||||
|
||||
# ǩ [LATIN SMALL LETTER K WITH CARON]
|
||||
"\u01E9" => "k"
|
||||
|
||||
# ʞ [LATIN SMALL LETTER TURNED K]
|
||||
"\u029E" => "k"
|
||||
|
||||
# ᶄ [LATIN SMALL LETTER K WITH PALATAL HOOK]
|
||||
"\u1D84" => "k"
|
||||
|
||||
# ḱ [LATIN SMALL LETTER K WITH ACUTE]
|
||||
"\u1E31" => "k"
|
||||
|
||||
# ḳ [LATIN SMALL LETTER K WITH DOT BELOW]
|
||||
"\u1E33" => "k"
|
||||
|
||||
# ḵ [LATIN SMALL LETTER K WITH LINE BELOW]
|
||||
"\u1E35" => "k"
|
||||
|
||||
# ⓚ [CIRCLED LATIN SMALL LETTER K]
|
||||
"\u24DA" => "k"
|
||||
|
||||
# ⱪ [LATIN SMALL LETTER K WITH DESCENDER]
|
||||
"\u2C6A" => "k"
|
||||
|
||||
# ꝁ [LATIN SMALL LETTER K WITH STROKE]
|
||||
"\uA741" => "k"
|
||||
|
||||
# ꝃ [LATIN SMALL LETTER K WITH DIAGONAL STROKE]
|
||||
"\uA743" => "k"
|
||||
|
||||
# ꝅ [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE]
|
||||
"\uA745" => "k"
|
||||
|
||||
# k [FULLWIDTH LATIN SMALL LETTER K]
|
||||
"\uFF4B" => "k"
|
||||
|
||||
# ⒦ [PARENTHESIZED LATIN SMALL LETTER K]
|
||||
"\u24A6" => "(k)"
|
||||
|
||||
# Ĺ [LATIN CAPITAL LETTER L WITH ACUTE]
|
||||
"\u0139" => "L"
|
||||
|
||||
# Ļ [LATIN CAPITAL LETTER L WITH CEDILLA]
|
||||
"\u013B" => "L"
|
||||
|
||||
# Ľ [LATIN CAPITAL LETTER L WITH CARON]
|
||||
"\u013D" => "L"
|
||||
|
||||
# Ŀ [LATIN CAPITAL LETTER L WITH MIDDLE DOT]
|
||||
"\u013F" => "L"
|
||||
|
||||
# Ł [LATIN CAPITAL LETTER L WITH STROKE]
|
||||
"\u0141" => "L"
|
||||
|
||||
# Ƚ [LATIN CAPITAL LETTER L WITH BAR]
|
||||
"\u023D" => "L"
|
||||
|
||||
# ʟ [LATIN LETTER SMALL CAPITAL L]
|
||||
"\u029F" => "L"
|
||||
|
||||
# ᴌ [LATIN LETTER SMALL CAPITAL L WITH STROKE]
|
||||
"\u1D0C" => "L"
|
||||
|
||||
# Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW]
|
||||
"\u1E36" => "L"
|
||||
|
||||
# Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON]
|
||||
"\u1E38" => "L"
|
||||
|
||||
# Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW]
|
||||
"\u1E3A" => "L"
|
||||
|
||||
# Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW]
|
||||
"\u1E3C" => "L"
|
||||
|
||||
# Ⓛ [CIRCLED LATIN CAPITAL LETTER L]
|
||||
"\u24C1" => "L"
|
||||
|
||||
# Ⱡ [LATIN CAPITAL LETTER L WITH DOUBLE BAR]
|
||||
"\u2C60" => "L"
|
||||
|
||||
# Ɫ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE]
|
||||
"\u2C62" => "L"
|
||||
|
||||
# Ꝇ [LATIN CAPITAL LETTER BROKEN L]
|
||||
"\uA746" => "L"
|
||||
|
||||
# Ꝉ [LATIN CAPITAL LETTER L WITH HIGH STROKE]
|
||||
"\uA748" => "L"
|
||||
|
||||
# Ꞁ [LATIN CAPITAL LETTER TURNED L]
|
||||
"\uA780" => "L"
|
||||
|
||||
# L [FULLWIDTH LATIN CAPITAL LETTER L]
|
||||
"\uFF2C" => "L"
|
||||
|
||||
# ĺ [LATIN SMALL LETTER L WITH ACUTE]
|
||||
"\u013A" => "l"
|
||||
|
||||
# ļ [LATIN SMALL LETTER L WITH CEDILLA]
|
||||
"\u013C" => "l"
|
||||
|
||||
# ľ [LATIN SMALL LETTER L WITH CARON]
|
||||
"\u013E" => "l"
|
||||
|
||||
# ŀ [LATIN SMALL LETTER L WITH MIDDLE DOT]
|
||||
"\u0140" => "l"
|
||||
|
||||
# ł [LATIN SMALL LETTER L WITH STROKE]
|
||||
"\u0142" => "l"
|
||||
|
||||
# ƚ [LATIN SMALL LETTER L WITH BAR]
|
||||
"\u019A" => "l"
|
||||
|
||||
# ȴ [LATIN SMALL LETTER L WITH CURL]
|
||||
"\u0234" => "l"
|
||||
|
||||
# ɫ [LATIN SMALL LETTER L WITH MIDDLE TILDE]
|
||||
"\u026B" => "l"
|
||||
|
||||
# ɬ [LATIN SMALL LETTER L WITH BELT]
|
||||
"\u026C" => "l"
|
||||
|
||||
# ɭ [LATIN SMALL LETTER L WITH RETROFLEX HOOK]
|
||||
"\u026D" => "l"
|
||||
|
||||
# ᶅ [LATIN SMALL LETTER L WITH PALATAL HOOK]
|
||||
"\u1D85" => "l"
|
||||
|
||||
# ḷ [LATIN SMALL LETTER L WITH DOT BELOW]
|
||||
"\u1E37" => "l"
|
||||
|
||||
# ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON]
|
||||
"\u1E39" => "l"
|
||||
|
||||
# ḻ [LATIN SMALL LETTER L WITH LINE BELOW]
|
||||
"\u1E3B" => "l"
|
||||
|
||||
# ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW]
|
||||
"\u1E3D" => "l"
|
||||
|
||||
# ⓛ [CIRCLED LATIN SMALL LETTER L]
|
||||
"\u24DB" => "l"
|
||||
|
||||
# ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR]
|
||||
"\u2C61" => "l"
|
||||
|
||||
# ꝇ [LATIN SMALL LETTER BROKEN L]
|
||||
"\uA747" => "l"
|
||||
|
||||
# ꝉ [LATIN SMALL LETTER L WITH HIGH STROKE]
|
||||
"\uA749" => "l"
|
||||
|
||||
# ꞁ [LATIN SMALL LETTER TURNED L]
|
||||
"\uA781" => "l"
|
||||
|
||||
# l [FULLWIDTH LATIN SMALL LETTER L]
|
||||
"\uFF4C" => "l"
|
||||
|
||||
# LJ [LATIN CAPITAL LETTER LJ]
|
||||
"\u01C7" => "LJ"
|
||||
|
||||
# Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL]
|
||||
"\u1EFA" => "LL"
|
||||
|
||||
# Lj [LATIN CAPITAL LETTER L WITH SMALL LETTER J]
|
||||
"\u01C8" => "Lj"
|
||||
|
||||
# ⒧ [PARENTHESIZED LATIN SMALL LETTER L]
|
||||
"\u24A7" => "(l)"
|
||||
|
||||
# lj [LATIN SMALL LETTER LJ]
|
||||
"\u01C9" => "lj"
|
||||
|
||||
# ỻ [LATIN SMALL LETTER MIDDLE-WELSH LL]
|
||||
"\u1EFB" => "ll"
|
||||
|
||||
# ʪ [LATIN SMALL LETTER LS DIGRAPH]
|
||||
"\u02AA" => "ls"
|
||||
|
||||
# ʫ [LATIN SMALL LETTER LZ DIGRAPH]
|
||||
"\u02AB" => "lz"
|
||||
|
||||
# Ɯ [LATIN CAPITAL LETTER TURNED M]
|
||||
"\u019C" => "M"
|
||||
|
||||
# ᴍ [LATIN LETTER SMALL CAPITAL M]
|
||||
"\u1D0D" => "M"
|
||||
|
||||
# Ḿ [LATIN CAPITAL LETTER M WITH ACUTE]
|
||||
"\u1E3E" => "M"
|
||||
|
||||
# Ṁ [LATIN CAPITAL LETTER M WITH DOT ABOVE]
|
||||
"\u1E40" => "M"
|
||||
|
||||
# Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW]
|
||||
"\u1E42" => "M"
|
||||
|
||||
# Ⓜ [CIRCLED LATIN CAPITAL LETTER M]
|
||||
"\u24C2" => "M"
|
||||
|
||||
# Ɱ [LATIN CAPITAL LETTER M WITH HOOK]
|
||||
"\u2C6E" => "M"
|
||||
|
||||
# ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M]
|
||||
"\uA7FD" => "M"
|
||||
|
||||
# ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M]
|
||||
"\uA7FF" => "M"
|
||||
|
||||
# M [FULLWIDTH LATIN CAPITAL LETTER M]
|
||||
"\uFF2D" => "M"
|
||||
|
||||
# ɯ [LATIN SMALL LETTER TURNED M]
|
||||
"\u026F" => "m"
|
||||
|
||||
# ɰ [LATIN SMALL LETTER TURNED M WITH LONG LEG]
|
||||
"\u0270" => "m"
|
||||
|
||||
# ɱ [LATIN SMALL LETTER M WITH HOOK]
|
||||
"\u0271" => "m"
|
||||
|
||||
# ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE]
|
||||
"\u1D6F" => "m"
|
||||
|
||||
# ᶆ [LATIN SMALL LETTER M WITH PALATAL HOOK]
|
||||
"\u1D86" => "m"
|
||||
|
||||
# ḿ [LATIN SMALL LETTER M WITH ACUTE]
|
||||
"\u1E3F" => "m"
|
||||
|
||||
# ṁ [LATIN SMALL LETTER M WITH DOT ABOVE]
|
||||
"\u1E41" => "m"
|
||||
|
||||
# ṃ [LATIN SMALL LETTER M WITH DOT BELOW]
|
||||
"\u1E43" => "m"
|
||||
|
||||
# ⓜ [CIRCLED LATIN SMALL LETTER M]
|
||||
"\u24DC" => "m"
|
||||
|
||||
# m [FULLWIDTH LATIN SMALL LETTER M]
|
||||
"\uFF4D" => "m"
|
||||
|
||||
# ⒨ [PARENTHESIZED LATIN SMALL LETTER M]
|
||||
"\u24A8" => "(m)"
|
||||
|
||||
# Ñ [LATIN CAPITAL LETTER N WITH TILDE]
|
||||
"\u00D1" => "N"
|
||||
|
||||
# Ń [LATIN CAPITAL LETTER N WITH ACUTE]
|
||||
"\u0143" => "N"
|
||||
|
||||
# Ņ [LATIN CAPITAL LETTER N WITH CEDILLA]
|
||||
"\u0145" => "N"
|
||||
|
||||
# Ň [LATIN CAPITAL LETTER N WITH CARON]
|
||||
"\u0147" => "N"
|
||||
|
||||
# Ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG]
|
||||
"\u014A" => "N"
|
||||
|
||||
# Ɲ [LATIN CAPITAL LETTER N WITH LEFT HOOK]
|
||||
"\u019D" => "N"
|
||||
|
||||
# Ǹ [LATIN CAPITAL LETTER N WITH GRAVE]
|
||||
"\u01F8" => "N"
|
||||
|
||||
# Ƞ [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG]
|
||||
"\u0220" => "N"
|
||||
|
||||
# ɴ [LATIN LETTER SMALL CAPITAL N]
|
||||
"\u0274" => "N"
|
||||
|
||||
# ᴎ [LATIN LETTER SMALL CAPITAL REVERSED N]
|
||||
"\u1D0E" => "N"
|
||||
|
||||
# Ṅ [LATIN CAPITAL LETTER N WITH DOT ABOVE]
|
||||
"\u1E44" => "N"
|
||||
|
||||
# Ṇ [LATIN CAPITAL LETTER N WITH DOT BELOW]
|
||||
"\u1E46" => "N"
|
||||
|
||||
# Ṉ [LATIN CAPITAL LETTER N WITH LINE BELOW]
|
||||
"\u1E48" => "N"
|
||||
|
||||
# Ṋ [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW]
|
||||
"\u1E4A" => "N"
|
||||
|
||||
# Ⓝ [CIRCLED LATIN CAPITAL LETTER N]
|
||||
"\u24C3" => "N"
|
||||
|
||||
# N [FULLWIDTH LATIN CAPITAL LETTER N]
|
||||
"\uFF2E" => "N"
|
||||
|
||||
# ñ [LATIN SMALL LETTER N WITH TILDE]
|
||||
"\u00F1" => "n"
|
||||
|
||||
# ń [LATIN SMALL LETTER N WITH ACUTE]
|
||||
"\u0144" => "n"
|
||||
|
||||
# ņ [LATIN SMALL LETTER N WITH CEDILLA]
|
||||
"\u0146" => "n"
|
||||
|
||||
# ň [LATIN SMALL LETTER N WITH CARON]
|
||||
"\u0148" => "n"
|
||||
|
||||
# ʼn [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE]
|
||||
"\u0149" => "n"
|
||||
|
||||
# ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN SMALL LETTER ENG]
|
||||
"\u014B" => "n"
|
||||
|
||||
# ƞ [LATIN SMALL LETTER N WITH LONG RIGHT LEG]
|
||||
"\u019E" => "n"
|
||||
|
||||
# ǹ [LATIN SMALL LETTER N WITH GRAVE]
|
||||
"\u01F9" => "n"
|
||||
|
||||
# ȵ [LATIN SMALL LETTER N WITH CURL]
|
||||
"\u0235" => "n"
|
||||
|
||||
# ɲ [LATIN SMALL LETTER N WITH LEFT HOOK]
|
||||
"\u0272" => "n"
|
||||
|
||||
# ɳ [LATIN SMALL LETTER N WITH RETROFLEX HOOK]
|
||||
"\u0273" => "n"
|
||||
|
||||
# ᵰ [LATIN SMALL LETTER N WITH MIDDLE TILDE]
|
||||
"\u1D70" => "n"
|
||||
|
||||
# ᶇ [LATIN SMALL LETTER N WITH PALATAL HOOK]
|
||||
"\u1D87" => "n"
|
||||
|
||||
# ṅ [LATIN SMALL LETTER N WITH DOT ABOVE]
|
||||
"\u1E45" => "n"
|
||||
|
||||
# ṇ [LATIN SMALL LETTER N WITH DOT BELOW]
|
||||
"\u1E47" => "n"
|
||||
|
||||
# ṉ [LATIN SMALL LETTER N WITH LINE BELOW]
|
||||
"\u1E49" => "n"
|
||||
|
||||
# ṋ [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW]
|
||||
"\u1E4B" => "n"
|
||||
|
||||
# ⁿ [SUPERSCRIPT LATIN SMALL LETTER N]
|
||||
"\u207F" => "n"
|
||||
|
||||
# ⓝ [CIRCLED LATIN SMALL LETTER N]
|
||||
"\u24DD" => "n"
|
||||
|
||||
# n [FULLWIDTH LATIN SMALL LETTER N]
|
||||
"\uFF4E" => "n"
|
||||
|
||||
# NJ [LATIN CAPITAL LETTER NJ]
|
||||
"\u01CA" => "NJ"
|
||||
|
||||
# Nj [LATIN CAPITAL LETTER N WITH SMALL LETTER J]
|
||||
"\u01CB" => "Nj"
|
||||
|
||||
# ⒩ [PARENTHESIZED LATIN SMALL LETTER N]
|
||||
"\u24A9" => "(n)"
|
||||
|
||||
# nj [LATIN SMALL LETTER NJ]
|
||||
"\u01CC" => "nj"
|
||||
|
||||
# Ò [LATIN CAPITAL LETTER O WITH GRAVE]
|
||||
"\u00D2" => "O"
|
||||
|
||||
# Ó [LATIN CAPITAL LETTER O WITH ACUTE]
|
||||
"\u00D3" => "O"
|
||||
|
||||
# Ô [LATIN CAPITAL LETTER O WITH CIRCUMFLEX]
|
||||
"\u00D4" => "O"
|
||||
|
||||
# Õ [LATIN CAPITAL LETTER O WITH TILDE]
|
||||
"\u00D5" => "O"
|
||||
|
||||
# Ö [LATIN CAPITAL LETTER O WITH DIAERESIS]
|
||||
"\u00D6" => "O"
|
||||
|
||||
# Ø [LATIN CAPITAL LETTER O WITH STROKE]
|
||||
"\u00D8" => "O"
|
||||
|
||||
# Ō [LATIN CAPITAL LETTER O WITH MACRON]
|
||||
"\u014C" => "O"
|
||||
|
||||
# Ŏ [LATIN CAPITAL LETTER O WITH BREVE]
|
||||
"\u014E" => "O"
|
||||
|
||||
# Ő [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE]
|
||||
"\u0150" => "O"
|
||||
|
||||
# Ɔ [LATIN CAPITAL LETTER OPEN O]
|
||||
"\u0186" => "O"
|
||||
|
||||
# Ɵ [LATIN CAPITAL LETTER O WITH MIDDLE TILDE]
|
||||
"\u019F" => "O"
|
||||
|
||||
# Ơ [LATIN CAPITAL LETTER O WITH HORN]
|
||||
"\u01A0" => "O"
|
||||
|
||||
# Ǒ [LATIN CAPITAL LETTER O WITH CARON]
|
||||
"\u01D1" => "O"
|
||||
|
||||
# Ǫ [LATIN CAPITAL LETTER O WITH OGONEK]
|
||||
"\u01EA" => "O"
|
||||
|
||||
# Ǭ [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON]
|
||||
"\u01EC" => "O"
|
||||
|
||||
# Ǿ [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE]
|
||||
"\u01FE" => "O"
|
||||
|
||||
# Ȍ [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE]
|
||||
"\u020C" => "O"
|
||||
|
||||
# Ȏ [LATIN CAPITAL LETTER O WITH INVERTED BREVE]
|
||||
"\u020E" => "O"
|
||||
|
||||
# Ȫ [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON]
|
||||
"\u022A" => "O"
|
||||
|
||||
# Ȭ [LATIN CAPITAL LETTER O WITH TILDE AND MACRON]
|
||||
"\u022C" => "O"
|
||||
|
||||
# Ȯ [LATIN CAPITAL LETTER O WITH DOT ABOVE]
|
||||
"\u022E" => "O"
|
||||
|
||||
# Ȱ [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON]
|
||||
"\u0230" => "O"
|
||||
|
||||
# ᴏ [LATIN LETTER SMALL CAPITAL O]
|
||||
"\u1D0F" => "O"
|
||||
|
||||
# ᴐ [LATIN LETTER SMALL CAPITAL OPEN O]
|
||||
"\u1D10" => "O"
|
||||
|
||||
# Ṍ [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE]
|
||||
"\u1E4C" => "O"
|
||||
|
||||
# Ṏ [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS]
|
||||
"\u1E4E" => "O"
|
||||
|
||||
# Ṑ [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE]
|
||||
"\u1E50" => "O"
|
||||
|
||||
# Ṓ [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE]
|
||||
"\u1E52" => "O"
|
||||
|
||||
# Ọ [LATIN CAPITAL LETTER O WITH DOT BELOW]
|
||||
"\u1ECC" => "O"
|
||||
|
||||
# Ỏ [LATIN CAPITAL LETTER O WITH HOOK ABOVE]
|
||||
"\u1ECE" => "O"
|
||||
|
||||
# Ố [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE]
|
||||
"\u1ED0" => "O"
|
||||
|
||||
# Ồ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE]
|
||||
"\u1ED2" => "O"
|
||||
|
||||
# Ổ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
|
||||
"\u1ED4" => "O"
|
||||
|
||||
# Ỗ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE]
|
||||
"\u1ED6" => "O"
|
||||
|
||||
# Ộ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
|
||||
"\u1ED8" => "O"
|
||||
|
||||
# Ớ [LATIN CAPITAL LETTER O WITH HORN AND ACUTE]
|
||||
"\u1EDA" => "O"
|
||||
|
||||
# Ờ [LATIN CAPITAL LETTER O WITH HORN AND GRAVE]
|
||||
"\u1EDC" => "O"
|
||||
|
||||
# Ở [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE]
|
||||
"\u1EDE" => "O"
|
||||
|
||||
# Ỡ [LATIN CAPITAL LETTER O WITH HORN AND TILDE]
|
||||
"\u1EE0" => "O"
|
||||
|
||||
# Ợ [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW]
|
||||
"\u1EE2" => "O"
|
||||
|
||||
# Ⓞ [CIRCLED LATIN CAPITAL LETTER O]
|
||||
"\u24C4" => "O"
|
||||
|
||||
# Ꝋ [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY]
|
||||
"\uA74A" => "O"
|
||||
|
||||
# Ꝍ [LATIN CAPITAL LETTER O WITH LOOP]
|
||||
"\uA74C" => "O"
|
||||
|
||||
# O [FULLWIDTH LATIN CAPITAL LETTER O]
|
||||
"\uFF2F" => "O"
|
||||
|
||||
# ò [LATIN SMALL LETTER O WITH GRAVE]
|
||||
"\u00F2" => "o"
|
||||
|
||||
# ó [LATIN SMALL LETTER O WITH ACUTE]
|
||||
"\u00F3" => "o"
|
||||
|
||||
# ô [LATIN SMALL LETTER O WITH CIRCUMFLEX]
|
||||
"\u00F4" => "o"
|
||||
|
||||
# õ [LATIN SMALL LETTER O WITH TILDE]
|
||||
"\u00F5" => "o"
|
||||
|
||||
# ö [LATIN SMALL LETTER O WITH DIAERESIS]
|
||||
"\u00F6" => "o"
|
||||
|
||||
# ø [LATIN SMALL LETTER O WITH STROKE]
|
||||
"\u00F8" => "o"
|
||||
|
||||
# ō [LATIN SMALL LETTER O WITH MACRON]
|
||||
"\u014D" => "o"
|
||||
|
||||
# ŏ [LATIN SMALL LETTER O WITH BREVE]
|
||||
"\u014F" => "o"
|
||||
|
||||
# ő [LATIN SMALL LETTER O WITH DOUBLE ACUTE]
|
||||
"\u0151" => "o"
|
||||
|
||||
# ơ [LATIN SMALL LETTER O WITH HORN]
|
||||
"\u01A1" => "o"
|
||||
|
||||
# ǒ [LATIN SMALL LETTER O WITH CARON]
|
||||
"\u01D2" => "o"
|
||||
|
||||
# ǫ [LATIN SMALL LETTER O WITH OGONEK]
|
||||
"\u01EB" => "o"
|
||||
|
||||
# ǭ [LATIN SMALL LETTER O WITH OGONEK AND MACRON]
|
||||
"\u01ED" => "o"
|
||||
|
||||
# ǿ [LATIN SMALL LETTER O WITH STROKE AND ACUTE]
|
||||
"\u01FF" => "o"
|
||||
|
||||
# ȍ [LATIN SMALL LETTER O WITH DOUBLE GRAVE]
|
||||
"\u020D" => "o"
|
||||
|
||||
# ȏ [LATIN SMALL LETTER O WITH INVERTED BREVE]
|
||||
"\u020F" => "o"
|
||||
|
||||
# ȫ [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON]
|
||||
"\u022B" => "o"
|
||||
|
||||
# ȭ [LATIN SMALL LETTER O WITH TILDE AND MACRON]
|
||||
"\u022D" => "o"
|
||||
|
||||
# ȯ [LATIN SMALL LETTER O WITH DOT ABOVE]
|
||||
"\u022F" => "o"
|
||||
|
||||
# ȱ [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON]
|
||||
"\u0231" => "o"
|
||||
|
||||
# ɔ [LATIN SMALL LETTER OPEN O]
|
||||
"\u0254" => "o"
|
||||
|
||||
# ɵ [LATIN SMALL LETTER BARRED O]
|
||||
"\u0275" => "o"
|
||||
|
||||
# ᴖ [LATIN SMALL LETTER TOP HALF O]
|
||||
"\u1D16" => "o"
|
||||
|
||||
# ᴗ [LATIN SMALL LETTER BOTTOM HALF O]
|
||||
"\u1D17" => "o"
|
||||
|
||||
# ᶗ [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK]
|
||||
"\u1D97" => "o"
|
||||
|
||||
# ṍ [LATIN SMALL LETTER O WITH TILDE AND ACUTE]
|
||||
"\u1E4D" => "o"
|
||||
|
||||
# ṏ [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS]
|
||||
"\u1E4F" => "o"
|
||||
|
||||
# ṑ [LATIN SMALL LETTER O WITH MACRON AND GRAVE]
|
||||
"\u1E51" => "o"
|
||||
|
||||
# ṓ [LATIN SMALL LETTER O WITH MACRON AND ACUTE]
|
||||
"\u1E53" => "o"
|
||||
|
||||
# ọ [LATIN SMALL LETTER O WITH DOT BELOW]
|
||||
"\u1ECD" => "o"
|
||||
|
||||
# ỏ [LATIN SMALL LETTER O WITH HOOK ABOVE]
|
||||
"\u1ECF" => "o"
|
||||
|
||||
# ố [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE]
|
||||
"\u1ED1" => "o"
|
||||
|
||||
# ồ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE]
|
||||
"\u1ED3" => "o"
|
||||
|
||||
# ổ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
|
||||
"\u1ED5" => "o"
|
||||
|
||||
# ỗ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE]
|
||||
"\u1ED7" => "o"
|
||||
|
||||
# ộ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
|
||||
"\u1ED9" => "o"
|
||||
|
||||
# ớ [LATIN SMALL LETTER O WITH HORN AND ACUTE]
|
||||
"\u1EDB" => "o"
|
||||
|
||||
# ờ [LATIN SMALL LETTER O WITH HORN AND GRAVE]
|
||||
"\u1EDD" => "o"
|
||||
|
||||
# ở [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE]
|
||||
"\u1EDF" => "o"
|
||||
|
||||
# ỡ [LATIN SMALL LETTER O WITH HORN AND TILDE]
|
||||
"\u1EE1" => "o"
|
||||
|
||||
# ợ [LATIN SMALL LETTER O WITH HORN AND DOT BELOW]
|
||||
"\u1EE3" => "o"
|
||||
|
||||
# ₒ [LATIN SUBSCRIPT SMALL LETTER O]
|
||||
"\u2092" => "o"
|
||||
|
||||
# ⓞ [CIRCLED LATIN SMALL LETTER O]
|
||||
"\u24DE" => "o"
|
||||
|
||||
# ⱺ [LATIN SMALL LETTER O WITH LOW RING INSIDE]
|
||||
"\u2C7A" => "o"
|
||||
|
||||
# ꝋ [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY]
|
||||
"\uA74B" => "o"
|
||||
|
||||
# ꝍ [LATIN SMALL LETTER O WITH LOOP]
|
||||
"\uA74D" => "o"
|
||||
|
||||
# o [FULLWIDTH LATIN SMALL LETTER O]
|
||||
"\uFF4F" => "o"
|
||||
|
||||
# Œ [LATIN CAPITAL LIGATURE OE]
|
||||
"\u0152" => "OE"
|
||||
|
||||
# ɶ [LATIN LETTER SMALL CAPITAL OE]
|
||||
"\u0276" => "OE"
|
||||
|
||||
# Ꝏ [LATIN CAPITAL LETTER OO]
|
||||
"\uA74E" => "OO"
|
||||
|
||||
# Ȣ http://en.wikipedia.org/wiki/OU [LATIN CAPITAL LETTER OU]
|
||||
"\u0222" => "OU"
|
||||
|
||||
# ᴕ [LATIN LETTER SMALL CAPITAL OU]
|
||||
"\u1D15" => "OU"
|
||||
|
||||
# ⒪ [PARENTHESIZED LATIN SMALL LETTER O]
|
||||
"\u24AA" => "(o)"
|
||||
|
||||
# œ [LATIN SMALL LIGATURE OE]
|
||||
"\u0153" => "oe"
|
||||
|
||||
# ᴔ [LATIN SMALL LETTER TURNED OE]
|
||||
"\u1D14" => "oe"
|
||||
|
||||
# ꝏ [LATIN SMALL LETTER OO]
|
||||
"\uA74F" => "oo"
|
||||
|
||||
# ȣ http://en.wikipedia.org/wiki/OU [LATIN SMALL LETTER OU]
|
||||
"\u0223" => "ou"
|
||||
|
||||
# Ƥ [LATIN CAPITAL LETTER P WITH HOOK]
|
||||
"\u01A4" => "P"
|
||||
|
||||
# ᴘ [LATIN LETTER SMALL CAPITAL P]
|
||||
"\u1D18" => "P"
|
||||
|
||||
# Ṕ [LATIN CAPITAL LETTER P WITH ACUTE]
|
||||
"\u1E54" => "P"
|
||||
|
||||
# Ṗ [LATIN CAPITAL LETTER P WITH DOT ABOVE]
|
||||
"\u1E56" => "P"
|
||||
|
||||
# Ⓟ [CIRCLED LATIN CAPITAL LETTER P]
|
||||
"\u24C5" => "P"
|
||||
|
||||
# Ᵽ [LATIN CAPITAL LETTER P WITH STROKE]
|
||||
"\u2C63" => "P"
|
||||
|
||||
# Ꝑ [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER]
|
||||
"\uA750" => "P"
|
||||
|
||||
# Ꝓ [LATIN CAPITAL LETTER P WITH FLOURISH]
|
||||
"\uA752" => "P"
|
||||
|
||||
# Ꝕ [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL]
|
||||
"\uA754" => "P"
|
||||
|
||||
# P [FULLWIDTH LATIN CAPITAL LETTER P]
|
||||
"\uFF30" => "P"
|
||||
|
||||
# ƥ [LATIN SMALL LETTER P WITH HOOK]
|
||||
"\u01A5" => "p"
|
||||
|
||||
# ᵱ [LATIN SMALL LETTER P WITH MIDDLE TILDE]
|
||||
"\u1D71" => "p"
|
||||
|
||||
# ᵽ [LATIN SMALL LETTER P WITH STROKE]
|
||||
"\u1D7D" => "p"
|
||||
|
||||
# ᶈ [LATIN SMALL LETTER P WITH PALATAL HOOK]
|
||||
"\u1D88" => "p"
|
||||
|
||||
# ṕ [LATIN SMALL LETTER P WITH ACUTE]
|
||||
"\u1E55" => "p"
|
||||
|
||||
# ṗ [LATIN SMALL LETTER P WITH DOT ABOVE]
|
||||
"\u1E57" => "p"
|
||||
|
||||
# ⓟ [CIRCLED LATIN SMALL LETTER P]
|
||||
"\u24DF" => "p"
|
||||
|
||||
# ꝑ [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER]
|
||||
"\uA751" => "p"
|
||||
|
||||
# ꝓ [LATIN SMALL LETTER P WITH FLOURISH]
|
||||
"\uA753" => "p"
|
||||
|
||||
# ꝕ [LATIN SMALL LETTER P WITH SQUIRREL TAIL]
|
||||
"\uA755" => "p"
|
||||
|
||||
# ꟼ [LATIN EPIGRAPHIC LETTER REVERSED P]
|
||||
"\uA7FC" => "p"
|
||||
|
||||
# p [FULLWIDTH LATIN SMALL LETTER P]
|
||||
"\uFF50" => "p"
|
||||
|
||||
# ⒫ [PARENTHESIZED LATIN SMALL LETTER P]
|
||||
"\u24AB" => "(p)"
|
||||
|
||||
# Ɋ [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL]
|
||||
"\u024A" => "Q"
|
||||
|
||||
# Ⓠ [CIRCLED LATIN CAPITAL LETTER Q]
|
||||
"\u24C6" => "Q"
|
||||
|
||||
# Ꝗ [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER]
|
||||
"\uA756" => "Q"
|
||||
|
||||
# Ꝙ [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE]
|
||||
"\uA758" => "Q"
|
||||
|
||||
# Q [FULLWIDTH LATIN CAPITAL LETTER Q]
|
||||
"\uFF31" => "Q"
|
||||
|
||||
# ĸ http://en.wikipedia.org/wiki/Kra_(letter) [LATIN SMALL LETTER KRA]
|
||||
"\u0138" => "q"
|
||||
|
||||
# ɋ [LATIN SMALL LETTER Q WITH HOOK TAIL]
|
||||
"\u024B" => "q"
|
||||
|
||||
# ʠ [LATIN SMALL LETTER Q WITH HOOK]
|
||||
"\u02A0" => "q"
|
||||
|
||||
# ⓠ [CIRCLED LATIN SMALL LETTER Q]
|
||||
"\u24E0" => "q"
|
||||
|
||||
# ꝗ [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER]
|
||||
"\uA757" => "q"
|
||||
|
||||
# ꝙ [LATIN SMALL LETTER Q WITH DIAGONAL STROKE]
|
||||
"\uA759" => "q"
|
||||
|
||||
# q [FULLWIDTH LATIN SMALL LETTER Q]
|
||||
"\uFF51" => "q"
|
||||
|
||||
# ⒬ [PARENTHESIZED LATIN SMALL LETTER Q]
|
||||
"\u24AC" => "(q)"
|
||||
|
||||
# ȹ [LATIN SMALL LETTER QP DIGRAPH]
|
||||
"\u0239" => "qp"
|
||||
|
||||
# Ŕ [LATIN CAPITAL LETTER R WITH ACUTE]
|
||||
"\u0154" => "R"
|
||||
|
||||
# Ŗ [LATIN CAPITAL LETTER R WITH CEDILLA]
|
||||
"\u0156" => "R"
|
||||
|
||||
# Ř [LATIN CAPITAL LETTER R WITH CARON]
|
||||
"\u0158" => "R"
|
||||
|
||||
# Ȓ [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE]
|
||||
"\u0210" => "R"
|
||||
|
||||
# Ȓ [LATIN CAPITAL LETTER R WITH INVERTED BREVE]
|
||||
"\u0212" => "R"
|
||||
|
||||
# Ɍ [LATIN CAPITAL LETTER R WITH STROKE]
|
||||
"\u024C" => "R"
|
||||
|
||||
# ʀ [LATIN LETTER SMALL CAPITAL R]
|
||||
"\u0280" => "R"
|
||||
|
||||
# ʁ [LATIN LETTER SMALL CAPITAL INVERTED R]
|
||||
"\u0281" => "R"
|
||||
|
||||
# ᴙ [LATIN LETTER SMALL CAPITAL REVERSED R]
|
||||
"\u1D19" => "R"
|
||||
|
||||
# ᴚ [LATIN LETTER SMALL CAPITAL TURNED R]
|
||||
"\u1D1A" => "R"
|
||||
|
||||
# Ṙ [LATIN CAPITAL LETTER R WITH DOT ABOVE]
|
||||
"\u1E58" => "R"
|
||||
|
||||
# Ṛ [LATIN CAPITAL LETTER R WITH DOT BELOW]
|
||||
"\u1E5A" => "R"
|
||||
|
||||
# Ṝ [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON]
|
||||
"\u1E5C" => "R"
|
||||
|
||||
# Ṟ [LATIN CAPITAL LETTER R WITH LINE BELOW]
|
||||
"\u1E5E" => "R"
|
||||
|
||||
# Ⓡ [CIRCLED LATIN CAPITAL LETTER R]
|
||||
"\u24C7" => "R"
|
||||
|
||||
# Ɽ [LATIN CAPITAL LETTER R WITH TAIL]
|
||||
"\u2C64" => "R"
|
||||
|
||||
# Ꝛ [LATIN CAPITAL LETTER R ROTUNDA]
|
||||
"\uA75A" => "R"
|
||||
|
||||
# Ꞃ [LATIN CAPITAL LETTER INSULAR R]
|
||||
"\uA782" => "R"
|
||||
|
||||
# R [FULLWIDTH LATIN CAPITAL LETTER R]
|
||||
"\uFF32" => "R"
|
||||
|
||||
# ŕ [LATIN SMALL LETTER R WITH ACUTE]
|
||||
"\u0155" => "r"
|
||||
|
||||
# ŗ [LATIN SMALL LETTER R WITH CEDILLA]
|
||||
"\u0157" => "r"
|
||||
|
||||
# ř [LATIN SMALL LETTER R WITH CARON]
|
||||
"\u0159" => "r"
|
||||
|
||||
# ȑ [LATIN SMALL LETTER R WITH DOUBLE GRAVE]
|
||||
"\u0211" => "r"
|
||||
|
||||
# ȓ [LATIN SMALL LETTER R WITH INVERTED BREVE]
|
||||
"\u0213" => "r"
|
||||
|
||||
# ɍ [LATIN SMALL LETTER R WITH STROKE]
|
||||
"\u024D" => "r"
|
||||
|
||||
# ɼ [LATIN SMALL LETTER R WITH LONG LEG]
|
||||
"\u027C" => "r"
|
||||
|
||||
# ɽ [LATIN SMALL LETTER R WITH TAIL]
|
||||
"\u027D" => "r"
|
||||
|
||||
# ɾ [LATIN SMALL LETTER R WITH FISHHOOK]
|
||||
"\u027E" => "r"
|
||||
|
||||
# ɿ [LATIN SMALL LETTER REVERSED R WITH FISHHOOK]
|
||||
"\u027F" => "r"
|
||||
|
||||
# ᵣ [LATIN SUBSCRIPT SMALL LETTER R]
|
||||
"\u1D63" => "r"
|
||||
|
||||
# ᵲ [LATIN SMALL LETTER R WITH MIDDLE TILDE]
|
||||
"\u1D72" => "r"
|
||||
|
||||
# ᵳ [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE]
|
||||
"\u1D73" => "r"
|
||||
|
||||
# ᶉ [LATIN SMALL LETTER R WITH PALATAL HOOK]
|
||||
"\u1D89" => "r"
|
||||
|
||||
# ṙ [LATIN SMALL LETTER R WITH DOT ABOVE]
|
||||
"\u1E59" => "r"
|
||||
|
||||
# ṛ [LATIN SMALL LETTER R WITH DOT BELOW]
|
||||
"\u1E5B" => "r"
|
||||
|
||||
# ṝ [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON]
|
||||
"\u1E5D" => "r"
|
||||
|
||||
# ṟ [LATIN SMALL LETTER R WITH LINE BELOW]
|
||||
"\u1E5F" => "r"
|
||||
|
||||
# ⓡ [CIRCLED LATIN SMALL LETTER R]
|
||||
"\u24E1" => "r"
|
||||
|
||||
# ꝛ [LATIN SMALL LETTER R ROTUNDA]
|
||||
"\uA75B" => "r"
|
||||
|
||||
# ꞃ [LATIN SMALL LETTER INSULAR R]
|
||||
"\uA783" => "r"
|
||||
|
||||
# r [FULLWIDTH LATIN SMALL LETTER R]
|
||||
"\uFF52" => "r"
|
||||
|
||||
# ⒭ [PARENTHESIZED LATIN SMALL LETTER R]
|
||||
"\u24AD" => "(r)"
|
||||
|
||||
# Ś [LATIN CAPITAL LETTER S WITH ACUTE]
|
||||
"\u015A" => "S"
|
||||
|
||||
# Ŝ [LATIN CAPITAL LETTER S WITH CIRCUMFLEX]
|
||||
"\u015C" => "S"
|
||||
|
||||
# Ş [LATIN CAPITAL LETTER S WITH CEDILLA]
|
||||
"\u015E" => "S"
|
||||
|
||||
# Š [LATIN CAPITAL LETTER S WITH CARON]
|
||||
"\u0160" => "S"
|
||||
|
||||
# Ș [LATIN CAPITAL LETTER S WITH COMMA BELOW]
|
||||
"\u0218" => "S"
|
||||
|
||||
# Ṡ [LATIN CAPITAL LETTER S WITH DOT ABOVE]
|
||||
"\u1E60" => "S"
|
||||
|
||||
# Ṣ [LATIN CAPITAL LETTER S WITH DOT BELOW]
|
||||
"\u1E62" => "S"
|
||||
|
||||
# Ṥ [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE]
|
||||
"\u1E64" => "S"
|
||||
|
||||
# Ṧ [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE]
|
||||
"\u1E66" => "S"
|
||||
|
||||
# Ṩ [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE]
|
||||
"\u1E68" => "S"
|
||||
|
||||
# Ⓢ [CIRCLED LATIN CAPITAL LETTER S]
|
||||
"\u24C8" => "S"
|
||||
|
||||
# ꜱ [LATIN LETTER SMALL CAPITAL S]
|
||||
"\uA731" => "S"
|
||||
|
||||
# ꞅ [LATIN SMALL LETTER INSULAR S]
|
||||
"\uA785" => "S"
|
||||
|
||||
# S [FULLWIDTH LATIN CAPITAL LETTER S]
|
||||
"\uFF33" => "S"
|
||||
|
||||
# ś [LATIN SMALL LETTER S WITH ACUTE]
|
||||
"\u015B" => "s"
|
||||
|
||||
# ŝ [LATIN SMALL LETTER S WITH CIRCUMFLEX]
|
||||
"\u015D" => "s"
|
||||
|
||||
# ş [LATIN SMALL LETTER S WITH CEDILLA]
|
||||
"\u015F" => "s"
|
||||
|
||||
# š [LATIN SMALL LETTER S WITH CARON]
|
||||
"\u0161" => "s"
|
||||
|
||||
# ſ http://en.wikipedia.org/wiki/Long_S [LATIN SMALL LETTER LONG S]
|
||||
"\u017F" => "s"
|
||||
|
||||
# ș [LATIN SMALL LETTER S WITH COMMA BELOW]
|
||||
"\u0219" => "s"
|
||||
|
||||
# ȿ [LATIN SMALL LETTER S WITH SWASH TAIL]
|
||||
"\u023F" => "s"
|
||||
|
||||
# ʂ [LATIN SMALL LETTER S WITH HOOK]
|
||||
"\u0282" => "s"
|
||||
|
||||
# ᵴ [LATIN SMALL LETTER S WITH MIDDLE TILDE]
|
||||
"\u1D74" => "s"
|
||||
|
||||
# ᶊ [LATIN SMALL LETTER S WITH PALATAL HOOK]
|
||||
"\u1D8A" => "s"
|
||||
|
||||
# ṡ [LATIN SMALL LETTER S WITH DOT ABOVE]
|
||||
"\u1E61" => "s"
|
||||
|
||||
# ṣ [LATIN SMALL LETTER S WITH DOT BELOW]
|
||||
"\u1E63" => "s"
|
||||
|
||||
# ṥ [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE]
|
||||
"\u1E65" => "s"
|
||||
|
||||
# ṧ [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE]
|
||||
"\u1E67" => "s"
|
||||
|
||||
# ṩ [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE]
|
||||
"\u1E69" => "s"
|
||||
|
||||
# ẜ [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE]
|
||||
"\u1E9C" => "s"
|
||||
|
||||
# ẝ [LATIN SMALL LETTER LONG S WITH HIGH STROKE]
|
||||
"\u1E9D" => "s"
|
||||
|
||||
# ⓢ [CIRCLED LATIN SMALL LETTER S]
|
||||
"\u24E2" => "s"
|
||||
|
||||
# Ꞅ [LATIN CAPITAL LETTER INSULAR S]
|
||||
"\uA784" => "s"
|
||||
|
||||
# s [FULLWIDTH LATIN SMALL LETTER S]
|
||||
"\uFF53" => "s"
|
||||
|
||||
# ẞ [LATIN CAPITAL LETTER SHARP S]
|
||||
"\u1E9E" => "SS"
|
||||
|
||||
# ⒮ [PARENTHESIZED LATIN SMALL LETTER S]
|
||||
"\u24AE" => "(s)"
|
||||
|
||||
# ß [LATIN SMALL LETTER SHARP S]
|
||||
"\u00DF" => "ss"
|
||||
|
||||
# st [LATIN SMALL LIGATURE ST]
|
||||
"\uFB06" => "st"
|
||||
|
||||
# Ţ [LATIN CAPITAL LETTER T WITH CEDILLA]
|
||||
"\u0162" => "T"
|
||||
|
||||
# Ť [LATIN CAPITAL LETTER T WITH CARON]
|
||||
"\u0164" => "T"
|
||||
|
||||
# Ŧ [LATIN CAPITAL LETTER T WITH STROKE]
|
||||
"\u0166" => "T"
|
||||
|
||||
# Ƭ [LATIN CAPITAL LETTER T WITH HOOK]
|
||||
"\u01AC" => "T"
|
||||
|
||||
# Ʈ [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK]
|
||||
"\u01AE" => "T"
|
||||
|
||||
# Ț [LATIN CAPITAL LETTER T WITH COMMA BELOW]
|
||||
"\u021A" => "T"
|
||||
|
||||
# Ⱦ [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE]
|
||||
"\u023E" => "T"
|
||||
|
||||
# ᴛ [LATIN LETTER SMALL CAPITAL T]
|
||||
"\u1D1B" => "T"
|
||||
|
||||
# Ṫ [LATIN CAPITAL LETTER T WITH DOT ABOVE]
|
||||
"\u1E6A" => "T"
|
||||
|
||||
# Ṭ [LATIN CAPITAL LETTER T WITH DOT BELOW]
|
||||
"\u1E6C" => "T"
|
||||
|
||||
# Ṯ [LATIN CAPITAL LETTER T WITH LINE BELOW]
|
||||
"\u1E6E" => "T"
|
||||
|
||||
# Ṱ [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW]
|
||||
"\u1E70" => "T"
|
||||
|
||||
# Ⓣ [CIRCLED LATIN CAPITAL LETTER T]
|
||||
"\u24C9" => "T"
|
||||
|
||||
# Ꞇ [LATIN CAPITAL LETTER INSULAR T]
|
||||
"\uA786" => "T"
|
||||
|
||||
# T [FULLWIDTH LATIN CAPITAL LETTER T]
|
||||
"\uFF34" => "T"
|
||||
|
||||
# ţ [LATIN SMALL LETTER T WITH CEDILLA]
|
||||
"\u0163" => "t"
|
||||
|
||||
# ť [LATIN SMALL LETTER T WITH CARON]
|
||||
"\u0165" => "t"
|
||||
|
||||
# ŧ [LATIN SMALL LETTER T WITH STROKE]
|
||||
"\u0167" => "t"
|
||||
|
||||
# ƫ [LATIN SMALL LETTER T WITH PALATAL HOOK]
|
||||
"\u01AB" => "t"
|
||||
|
||||
# ƭ [LATIN SMALL LETTER T WITH HOOK]
|
||||
"\u01AD" => "t"
|
||||
|
||||
# ț [LATIN SMALL LETTER T WITH COMMA BELOW]
|
||||
"\u021B" => "t"
|
||||
|
||||
# ȶ [LATIN SMALL LETTER T WITH CURL]
|
||||
"\u0236" => "t"
|
||||
|
||||
# ʇ [LATIN SMALL LETTER TURNED T]
|
||||
"\u0287" => "t"
|
||||
|
||||
# ʈ [LATIN SMALL LETTER T WITH RETROFLEX HOOK]
|
||||
"\u0288" => "t"
|
||||
|
||||
# ᵵ [LATIN SMALL LETTER T WITH MIDDLE TILDE]
|
||||
"\u1D75" => "t"
|
||||
|
||||
# ṫ [LATIN SMALL LETTER T WITH DOT ABOVE]
|
||||
"\u1E6B" => "t"
|
||||
|
||||
# ṭ [LATIN SMALL LETTER T WITH DOT BELOW]
|
||||
"\u1E6D" => "t"
|
||||
|
||||
# ṯ [LATIN SMALL LETTER T WITH LINE BELOW]
|
||||
"\u1E6F" => "t"
|
||||
|
||||
# ṱ [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW]
|
||||
"\u1E71" => "t"
|
||||
|
||||
# ẗ [LATIN SMALL LETTER T WITH DIAERESIS]
|
||||
"\u1E97" => "t"
|
||||
|
||||
# ⓣ [CIRCLED LATIN SMALL LETTER T]
|
||||
"\u24E3" => "t"
|
||||
|
||||
# ⱦ [LATIN SMALL LETTER T WITH DIAGONAL STROKE]
|
||||
"\u2C66" => "t"
|
||||
|
||||
# t [FULLWIDTH LATIN SMALL LETTER T]
|
||||
"\uFF54" => "t"
|
||||
|
||||
# Þ [LATIN CAPITAL LETTER THORN]
|
||||
"\u00DE" => "TH"
|
||||
|
||||
# Ꝧ [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER]
|
||||
"\uA766" => "TH"
|
||||
|
||||
# Ꜩ [LATIN CAPITAL LETTER TZ]
|
||||
"\uA728" => "TZ"
|
||||
|
||||
# ⒯ [PARENTHESIZED LATIN SMALL LETTER T]
|
||||
"\u24AF" => "(t)"
|
||||
|
||||
# ʨ [LATIN SMALL LETTER TC DIGRAPH WITH CURL]
|
||||
"\u02A8" => "tc"
|
||||
|
||||
# þ [LATIN SMALL LETTER THORN]
|
||||
"\u00FE" => "th"
|
||||
|
||||
# ᵺ [LATIN SMALL LETTER TH WITH STRIKETHROUGH]
|
||||
"\u1D7A" => "th"
|
||||
|
||||
# ꝧ [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER]
|
||||
"\uA767" => "th"
|
||||
|
||||
# ʦ [LATIN SMALL LETTER TS DIGRAPH]
|
||||
"\u02A6" => "ts"
|
||||
|
||||
# ꜩ [LATIN SMALL LETTER TZ]
|
||||
"\uA729" => "tz"
|
||||
|
||||
# Ù [LATIN CAPITAL LETTER U WITH GRAVE]
|
||||
"\u00D9" => "U"
|
||||
|
||||
# Ú [LATIN CAPITAL LETTER U WITH ACUTE]
|
||||
"\u00DA" => "U"
|
||||
|
||||
# Û [LATIN CAPITAL LETTER U WITH CIRCUMFLEX]
|
||||
"\u00DB" => "U"
|
||||
|
||||
# Ü [LATIN CAPITAL LETTER U WITH DIAERESIS]
|
||||
"\u00DC" => "U"
|
||||
|
||||
# Ũ [LATIN CAPITAL LETTER U WITH TILDE]
|
||||
"\u0168" => "U"
|
||||
|
||||
# Ū [LATIN CAPITAL LETTER U WITH MACRON]
|
||||
"\u016A" => "U"
|
||||
|
||||
# Ŭ [LATIN CAPITAL LETTER U WITH BREVE]
|
||||
"\u016C" => "U"
|
||||
|
||||
# Ů [LATIN CAPITAL LETTER U WITH RING ABOVE]
|
||||
"\u016E" => "U"
|
||||
|
||||
# Ű [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE]
|
||||
"\u0170" => "U"
|
||||
|
||||
# Ų [LATIN CAPITAL LETTER U WITH OGONEK]
|
||||
"\u0172" => "U"
|
||||
|
||||
# Ư [LATIN CAPITAL LETTER U WITH HORN]
|
||||
"\u01AF" => "U"
|
||||
|
||||
# Ǔ [LATIN CAPITAL LETTER U WITH CARON]
|
||||
"\u01D3" => "U"
|
||||
|
||||
# Ǖ [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON]
|
||||
"\u01D5" => "U"
|
||||
|
||||
# Ǘ [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE]
|
||||
"\u01D7" => "U"
|
||||
|
||||
# Ǚ [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON]
|
||||
"\u01D9" => "U"
|
||||
|
||||
# Ǜ [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE]
|
||||
"\u01DB" => "U"
|
||||
|
||||
# Ȕ [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE]
|
||||
"\u0214" => "U"
|
||||
|
||||
# Ȗ [LATIN CAPITAL LETTER U WITH INVERTED BREVE]
|
||||
"\u0216" => "U"
|
||||
|
||||
# Ʉ [LATIN CAPITAL LETTER U BAR]
|
||||
"\u0244" => "U"
|
||||
|
||||
# ᴜ [LATIN LETTER SMALL CAPITAL U]
|
||||
"\u1D1C" => "U"
|
||||
|
||||
# ᵾ [LATIN SMALL CAPITAL LETTER U WITH STROKE]
|
||||
"\u1D7E" => "U"
|
||||
|
||||
# Ṳ [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW]
|
||||
"\u1E72" => "U"
|
||||
|
||||
# Ṵ [LATIN CAPITAL LETTER U WITH TILDE BELOW]
|
||||
"\u1E74" => "U"
|
||||
|
||||
# Ṷ [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW]
|
||||
"\u1E76" => "U"
|
||||
|
||||
# Ṹ [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE]
|
||||
"\u1E78" => "U"
|
||||
|
||||
# Ṻ [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS]
|
||||
"\u1E7A" => "U"
|
||||
|
||||
# Ụ [LATIN CAPITAL LETTER U WITH DOT BELOW]
|
||||
"\u1EE4" => "U"
|
||||
|
||||
# Ủ [LATIN CAPITAL LETTER U WITH HOOK ABOVE]
|
||||
"\u1EE6" => "U"
|
||||
|
||||
# Ứ [LATIN CAPITAL LETTER U WITH HORN AND ACUTE]
|
||||
"\u1EE8" => "U"
|
||||
|
||||
# Ừ [LATIN CAPITAL LETTER U WITH HORN AND GRAVE]
|
||||
"\u1EEA" => "U"
|
||||
|
||||
# Ử [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE]
|
||||
"\u1EEC" => "U"
|
||||
|
||||
# Ữ [LATIN CAPITAL LETTER U WITH HORN AND TILDE]
|
||||
"\u1EEE" => "U"
|
||||
|
||||
# Ự [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW]
|
||||
"\u1EF0" => "U"
|
||||
|
||||
# Ⓤ [CIRCLED LATIN CAPITAL LETTER U]
|
||||
"\u24CA" => "U"
|
||||
|
||||
# U [FULLWIDTH LATIN CAPITAL LETTER U]
|
||||
"\uFF35" => "U"
|
||||
|
||||
# ù [LATIN SMALL LETTER U WITH GRAVE]
|
||||
"\u00F9" => "u"
|
||||
|
||||
# ú [LATIN SMALL LETTER U WITH ACUTE]
|
||||
"\u00FA" => "u"
|
||||
|
||||
# û [LATIN SMALL LETTER U WITH CIRCUMFLEX]
|
||||
"\u00FB" => "u"
|
||||
|
||||
# ü [LATIN SMALL LETTER U WITH DIAERESIS]
|
||||
"\u00FC" => "u"
|
||||
|
||||
# ũ [LATIN SMALL LETTER U WITH TILDE]
|
||||
"\u0169" => "u"
|
||||
|
||||
# ū [LATIN SMALL LETTER U WITH MACRON]
|
||||
"\u016B" => "u"
|
||||
|
||||
# ŭ [LATIN SMALL LETTER U WITH BREVE]
|
||||
"\u016D" => "u"
|
||||
|
||||
# ů [LATIN SMALL LETTER U WITH RING ABOVE]
|
||||
"\u016F" => "u"
|
||||
|
||||
# ű [LATIN SMALL LETTER U WITH DOUBLE ACUTE]
|
||||
"\u0171" => "u"
|
||||
|
||||
# ų [LATIN SMALL LETTER U WITH OGONEK]
|
||||
"\u0173" => "u"
|
||||
|
||||
# ư [LATIN SMALL LETTER U WITH HORN]
|
||||
"\u01B0" => "u"
|
||||
|
||||
# ǔ [LATIN SMALL LETTER U WITH CARON]
|
||||
"\u01D4" => "u"
|
||||
|
||||
# ǖ [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON]
|
||||
"\u01D6" => "u"
|
||||
|
||||
# ǘ [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE]
|
||||
"\u01D8" => "u"
|
||||
|
||||
# ǚ [LATIN SMALL LETTER U WITH DIAERESIS AND CARON]
|
||||
"\u01DA" => "u"
|
||||
|
||||
# ǜ [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE]
|
||||
"\u01DC" => "u"
|
||||
|
||||
# ȕ [LATIN SMALL LETTER U WITH DOUBLE GRAVE]
|
||||
"\u0215" => "u"
|
||||
|
||||
# ȗ [LATIN SMALL LETTER U WITH INVERTED BREVE]
|
||||
"\u0217" => "u"
|
||||
|
||||
# ʉ [LATIN SMALL LETTER U BAR]
|
||||
"\u0289" => "u"
|
||||
|
||||
# ᵤ [LATIN SUBSCRIPT SMALL LETTER U]
|
||||
"\u1D64" => "u"
|
||||
|
||||
# ᶙ [LATIN SMALL LETTER U WITH RETROFLEX HOOK]
|
||||
"\u1D99" => "u"
|
||||
|
||||
# ṳ [LATIN SMALL LETTER U WITH DIAERESIS BELOW]
|
||||
"\u1E73" => "u"
|
||||
|
||||
# ṵ [LATIN SMALL LETTER U WITH TILDE BELOW]
|
||||
"\u1E75" => "u"
|
||||
|
||||
# ṷ [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW]
|
||||
"\u1E77" => "u"
|
||||
|
||||
# ṹ [LATIN SMALL LETTER U WITH TILDE AND ACUTE]
|
||||
"\u1E79" => "u"
|
||||
|
||||
# ṻ [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS]
|
||||
"\u1E7B" => "u"
|
||||
|
||||
# ụ [LATIN SMALL LETTER U WITH DOT BELOW]
|
||||
"\u1EE5" => "u"
|
||||
|
||||
# ủ [LATIN SMALL LETTER U WITH HOOK ABOVE]
|
||||
"\u1EE7" => "u"
|
||||
|
||||
# ứ [LATIN SMALL LETTER U WITH HORN AND ACUTE]
|
||||
"\u1EE9" => "u"
|
||||
|
||||
# ừ [LATIN SMALL LETTER U WITH HORN AND GRAVE]
|
||||
"\u1EEB" => "u"
|
||||
|
||||
# ử [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE]
|
||||
"\u1EED" => "u"
|
||||
|
||||
# ữ [LATIN SMALL LETTER U WITH HORN AND TILDE]
|
||||
"\u1EEF" => "u"
|
||||
|
||||
# ự [LATIN SMALL LETTER U WITH HORN AND DOT BELOW]
|
||||
"\u1EF1" => "u"
|
||||
|
||||
# ⓤ [CIRCLED LATIN SMALL LETTER U]
|
||||
"\u24E4" => "u"
|
||||
|
||||
# u [FULLWIDTH LATIN SMALL LETTER U]
|
||||
"\uFF55" => "u"
|
||||
|
||||
# ⒰ [PARENTHESIZED LATIN SMALL LETTER U]
|
||||
"\u24B0" => "(u)"
|
||||
|
||||
# ᵫ [LATIN SMALL LETTER UE]
|
||||
"\u1D6B" => "ue"
|
||||
|
||||
# Ʋ [LATIN CAPITAL LETTER V WITH HOOK]
|
||||
"\u01B2" => "V"
|
||||
|
||||
# Ʌ [LATIN CAPITAL LETTER TURNED V]
|
||||
"\u0245" => "V"
|
||||
|
||||
# ᴠ [LATIN LETTER SMALL CAPITAL V]
|
||||
"\u1D20" => "V"
|
||||
|
||||
# Ṽ [LATIN CAPITAL LETTER V WITH TILDE]
|
||||
"\u1E7C" => "V"
|
||||
|
||||
# Ṿ [LATIN CAPITAL LETTER V WITH DOT BELOW]
|
||||
"\u1E7E" => "V"
|
||||
|
||||
# Ỽ [LATIN CAPITAL LETTER MIDDLE-WELSH V]
|
||||
"\u1EFC" => "V"
|
||||
|
||||
# Ⓥ [CIRCLED LATIN CAPITAL LETTER V]
|
||||
"\u24CB" => "V"
|
||||
|
||||
# Ꝟ [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE]
|
||||
"\uA75E" => "V"
|
||||
|
||||
# Ꝩ [LATIN CAPITAL LETTER VEND]
|
||||
"\uA768" => "V"
|
||||
|
||||
# V [FULLWIDTH LATIN CAPITAL LETTER V]
|
||||
"\uFF36" => "V"
|
||||
|
||||
# ʋ [LATIN SMALL LETTER V WITH HOOK]
|
||||
"\u028B" => "v"
|
||||
|
||||
# ʌ [LATIN SMALL LETTER TURNED V]
|
||||
"\u028C" => "v"
|
||||
|
||||
# ᵥ [LATIN SUBSCRIPT SMALL LETTER V]
|
||||
"\u1D65" => "v"
|
||||
|
||||
# ᶌ [LATIN SMALL LETTER V WITH PALATAL HOOK]
|
||||
"\u1D8C" => "v"
|
||||
|
||||
# ṽ [LATIN SMALL LETTER V WITH TILDE]
|
||||
"\u1E7D" => "v"
|
||||
|
||||
# ṿ [LATIN SMALL LETTER V WITH DOT BELOW]
|
||||
"\u1E7F" => "v"
|
||||
|
||||
# ⓥ [CIRCLED LATIN SMALL LETTER V]
|
||||
"\u24E5" => "v"
|
||||
|
||||
# ⱱ [LATIN SMALL LETTER V WITH RIGHT HOOK]
|
||||
"\u2C71" => "v"
|
||||
|
||||
# ⱴ [LATIN SMALL LETTER V WITH CURL]
|
||||
"\u2C74" => "v"
|
||||
|
||||
# ꝟ [LATIN SMALL LETTER V WITH DIAGONAL STROKE]
|
||||
"\uA75F" => "v"
|
||||
|
||||
# v [FULLWIDTH LATIN SMALL LETTER V]
|
||||
"\uFF56" => "v"
|
||||
|
||||
# Ꝡ [LATIN CAPITAL LETTER VY]
|
||||
"\uA760" => "VY"
|
||||
|
||||
# ⒱ [PARENTHESIZED LATIN SMALL LETTER V]
|
||||
"\u24B1" => "(v)"
|
||||
|
||||
# ꝡ [LATIN SMALL LETTER VY]
|
||||
"\uA761" => "vy"
|
||||
|
||||
# Ŵ [LATIN CAPITAL LETTER W WITH CIRCUMFLEX]
|
||||
"\u0174" => "W"
|
||||
|
||||
# Ƿ http://en.wikipedia.org/wiki/Wynn [LATIN CAPITAL LETTER WYNN]
|
||||
"\u01F7" => "W"
|
||||
|
||||
# ᴡ [LATIN LETTER SMALL CAPITAL W]
|
||||
"\u1D21" => "W"
|
||||
|
||||
# Ẁ [LATIN CAPITAL LETTER W WITH GRAVE]
|
||||
"\u1E80" => "W"
|
||||
|
||||
# Ẃ [LATIN CAPITAL LETTER W WITH ACUTE]
|
||||
"\u1E82" => "W"
|
||||
|
||||
# Ẅ [LATIN CAPITAL LETTER W WITH DIAERESIS]
|
||||
"\u1E84" => "W"
|
||||
|
||||
# Ẇ [LATIN CAPITAL LETTER W WITH DOT ABOVE]
|
||||
"\u1E86" => "W"
|
||||
|
||||
# Ẉ [LATIN CAPITAL LETTER W WITH DOT BELOW]
|
||||
"\u1E88" => "W"
|
||||
|
||||
# Ⓦ [CIRCLED LATIN CAPITAL LETTER W]
|
||||
"\u24CC" => "W"
|
||||
|
||||
# Ⱳ [LATIN CAPITAL LETTER W WITH HOOK]
|
||||
"\u2C72" => "W"
|
||||
|
||||
# W [FULLWIDTH LATIN CAPITAL LETTER W]
|
||||
"\uFF37" => "W"
|
||||
|
||||
# ŵ [LATIN SMALL LETTER W WITH CIRCUMFLEX]
|
||||
"\u0175" => "w"
|
||||
|
||||
# ƿ http://en.wikipedia.org/wiki/Wynn [LATIN LETTER WYNN]
|
||||
"\u01BF" => "w"
|
||||
|
||||
# ʍ [LATIN SMALL LETTER TURNED W]
|
||||
"\u028D" => "w"
|
||||
|
||||
# ẁ [LATIN SMALL LETTER W WITH GRAVE]
|
||||
"\u1E81" => "w"
|
||||
|
||||
# ẃ [LATIN SMALL LETTER W WITH ACUTE]
|
||||
"\u1E83" => "w"
|
||||
|
||||
# ẅ [LATIN SMALL LETTER W WITH DIAERESIS]
|
||||
"\u1E85" => "w"
|
||||
|
||||
# ẇ [LATIN SMALL LETTER W WITH DOT ABOVE]
|
||||
"\u1E87" => "w"
|
||||
|
||||
# ẉ [LATIN SMALL LETTER W WITH DOT BELOW]
|
||||
"\u1E89" => "w"
|
||||
|
||||
# ẘ [LATIN SMALL LETTER W WITH RING ABOVE]
|
||||
"\u1E98" => "w"
|
||||
|
||||
# ⓦ [CIRCLED LATIN SMALL LETTER W]
|
||||
"\u24E6" => "w"
|
||||
|
||||
# ⱳ [LATIN SMALL LETTER W WITH HOOK]
|
||||
"\u2C73" => "w"
|
||||
|
||||
# w [FULLWIDTH LATIN SMALL LETTER W]
|
||||
"\uFF57" => "w"
|
||||
|
||||
# ⒲ [PARENTHESIZED LATIN SMALL LETTER W]
|
||||
"\u24B2" => "(w)"
|
||||
|
||||
# Ẋ [LATIN CAPITAL LETTER X WITH DOT ABOVE]
|
||||
"\u1E8A" => "X"
|
||||
|
||||
# Ẍ [LATIN CAPITAL LETTER X WITH DIAERESIS]
|
||||
"\u1E8C" => "X"
|
||||
|
||||
# Ⓧ [CIRCLED LATIN CAPITAL LETTER X]
|
||||
"\u24CD" => "X"
|
||||
|
||||
# X [FULLWIDTH LATIN CAPITAL LETTER X]
|
||||
"\uFF38" => "X"
|
||||
|
||||
# ᶍ [LATIN SMALL LETTER X WITH PALATAL HOOK]
|
||||
"\u1D8D" => "x"
|
||||
|
||||
# ẋ [LATIN SMALL LETTER X WITH DOT ABOVE]
|
||||
"\u1E8B" => "x"
|
||||
|
||||
# ẍ [LATIN SMALL LETTER X WITH DIAERESIS]
|
||||
"\u1E8D" => "x"
|
||||
|
||||
# ₓ [LATIN SUBSCRIPT SMALL LETTER X]
|
||||
"\u2093" => "x"
|
||||
|
||||
# ⓧ [CIRCLED LATIN SMALL LETTER X]
|
||||
"\u24E7" => "x"
|
||||
|
||||
# x [FULLWIDTH LATIN SMALL LETTER X]
|
||||
"\uFF58" => "x"
|
||||
|
||||
# ⒳ [PARENTHESIZED LATIN SMALL LETTER X]
|
||||
"\u24B3" => "(x)"
|
||||
|
||||
# Ý [LATIN CAPITAL LETTER Y WITH ACUTE]
|
||||
"\u00DD" => "Y"
|
||||
|
||||
# Ŷ [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX]
|
||||
"\u0176" => "Y"
|
||||
|
||||
# Ÿ [LATIN CAPITAL LETTER Y WITH DIAERESIS]
|
||||
"\u0178" => "Y"
|
||||
|
||||
# Ƴ [LATIN CAPITAL LETTER Y WITH HOOK]
|
||||
"\u01B3" => "Y"
|
||||
|
||||
# Ȳ [LATIN CAPITAL LETTER Y WITH MACRON]
|
||||
"\u0232" => "Y"
|
||||
|
||||
# Ɏ [LATIN CAPITAL LETTER Y WITH STROKE]
|
||||
"\u024E" => "Y"
|
||||
|
||||
# ʏ [LATIN LETTER SMALL CAPITAL Y]
|
||||
"\u028F" => "Y"
|
||||
|
||||
# Ẏ [LATIN CAPITAL LETTER Y WITH DOT ABOVE]
|
||||
"\u1E8E" => "Y"
|
||||
|
||||
# Ỳ [LATIN CAPITAL LETTER Y WITH GRAVE]
|
||||
"\u1EF2" => "Y"
|
||||
|
||||
# Ỵ [LATIN CAPITAL LETTER Y WITH DOT BELOW]
|
||||
"\u1EF4" => "Y"
|
||||
|
||||
# Ỷ [LATIN CAPITAL LETTER Y WITH HOOK ABOVE]
|
||||
"\u1EF6" => "Y"
|
||||
|
||||
# Ỹ [LATIN CAPITAL LETTER Y WITH TILDE]
|
||||
"\u1EF8" => "Y"
|
||||
|
||||
# Ỿ [LATIN CAPITAL LETTER Y WITH LOOP]
|
||||
"\u1EFE" => "Y"
|
||||
|
||||
# Ⓨ [CIRCLED LATIN CAPITAL LETTER Y]
|
||||
"\u24CE" => "Y"
|
||||
|
||||
# Y [FULLWIDTH LATIN CAPITAL LETTER Y]
|
||||
"\uFF39" => "Y"
|
||||
|
||||
# ý [LATIN SMALL LETTER Y WITH ACUTE]
|
||||
"\u00FD" => "y"
|
||||
|
||||
# ÿ [LATIN SMALL LETTER Y WITH DIAERESIS]
|
||||
"\u00FF" => "y"
|
||||
|
||||
# ŷ [LATIN SMALL LETTER Y WITH CIRCUMFLEX]
|
||||
"\u0177" => "y"
|
||||
|
||||
# ƴ [LATIN SMALL LETTER Y WITH HOOK]
|
||||
"\u01B4" => "y"
|
||||
|
||||
# ȳ [LATIN SMALL LETTER Y WITH MACRON]
|
||||
"\u0233" => "y"
|
||||
|
||||
# ɏ [LATIN SMALL LETTER Y WITH STROKE]
|
||||
"\u024F" => "y"
|
||||
|
||||
# ʎ [LATIN SMALL LETTER TURNED Y]
|
||||
"\u028E" => "y"
|
||||
|
||||
# ẏ [LATIN SMALL LETTER Y WITH DOT ABOVE]
|
||||
"\u1E8F" => "y"
|
||||
|
||||
# ẙ [LATIN SMALL LETTER Y WITH RING ABOVE]
|
||||
"\u1E99" => "y"
|
||||
|
||||
# ỳ [LATIN SMALL LETTER Y WITH GRAVE]
|
||||
"\u1EF3" => "y"
|
||||
|
||||
# ỵ [LATIN SMALL LETTER Y WITH DOT BELOW]
|
||||
"\u1EF5" => "y"
|
||||
|
||||
# ỷ [LATIN SMALL LETTER Y WITH HOOK ABOVE]
|
||||
"\u1EF7" => "y"
|
||||
|
||||
# ỹ [LATIN SMALL LETTER Y WITH TILDE]
|
||||
"\u1EF9" => "y"
|
||||
|
||||
# ỿ [LATIN SMALL LETTER Y WITH LOOP]
|
||||
"\u1EFF" => "y"
|
||||
|
||||
# ⓨ [CIRCLED LATIN SMALL LETTER Y]
|
||||
"\u24E8" => "y"
|
||||
|
||||
# y [FULLWIDTH LATIN SMALL LETTER Y]
|
||||
"\uFF59" => "y"
|
||||
|
||||
# ⒴ [PARENTHESIZED LATIN SMALL LETTER Y]
|
||||
"\u24B4" => "(y)"
|
||||
|
||||
# Ź [LATIN CAPITAL LETTER Z WITH ACUTE]
|
||||
"\u0179" => "Z"
|
||||
|
||||
# Ż [LATIN CAPITAL LETTER Z WITH DOT ABOVE]
|
||||
"\u017B" => "Z"
|
||||
|
||||
# Ž [LATIN CAPITAL LETTER Z WITH CARON]
|
||||
"\u017D" => "Z"
|
||||
|
||||
# Ƶ [LATIN CAPITAL LETTER Z WITH STROKE]
|
||||
"\u01B5" => "Z"
|
||||
|
||||
# Ȝ http://en.wikipedia.org/wiki/Yogh [LATIN CAPITAL LETTER YOGH]
|
||||
"\u021C" => "Z"
|
||||
|
||||
# Ȥ [LATIN CAPITAL LETTER Z WITH HOOK]
|
||||
"\u0224" => "Z"
|
||||
|
||||
# ᴢ [LATIN LETTER SMALL CAPITAL Z]
|
||||
"\u1D22" => "Z"
|
||||
|
||||
# Ẑ [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX]
|
||||
"\u1E90" => "Z"
|
||||
|
||||
# Ẓ [LATIN CAPITAL LETTER Z WITH DOT BELOW]
|
||||
"\u1E92" => "Z"
|
||||
|
||||
# Ẕ [LATIN CAPITAL LETTER Z WITH LINE BELOW]
|
||||
"\u1E94" => "Z"
|
||||
|
||||
# Ⓩ [CIRCLED LATIN CAPITAL LETTER Z]
|
||||
"\u24CF" => "Z"
|
||||
|
||||
# Ⱬ [LATIN CAPITAL LETTER Z WITH DESCENDER]
|
||||
"\u2C6B" => "Z"
|
||||
|
||||
# Ꝣ [LATIN CAPITAL LETTER VISIGOTHIC Z]
|
||||
"\uA762" => "Z"
|
||||
|
||||
# Z [FULLWIDTH LATIN CAPITAL LETTER Z]
|
||||
"\uFF3A" => "Z"
|
||||
|
||||
# ź [LATIN SMALL LETTER Z WITH ACUTE]
|
||||
"\u017A" => "z"
|
||||
|
||||
# ż [LATIN SMALL LETTER Z WITH DOT ABOVE]
|
||||
"\u017C" => "z"
|
||||
|
||||
# ž [LATIN SMALL LETTER Z WITH CARON]
|
||||
"\u017E" => "z"
|
||||
|
||||
# ƶ [LATIN SMALL LETTER Z WITH STROKE]
|
||||
"\u01B6" => "z"
|
||||
|
||||
# ȝ http://en.wikipedia.org/wiki/Yogh [LATIN SMALL LETTER YOGH]
|
||||
"\u021D" => "z"
|
||||
|
||||
# ȥ [LATIN SMALL LETTER Z WITH HOOK]
|
||||
"\u0225" => "z"
|
||||
|
||||
# ɀ [LATIN SMALL LETTER Z WITH SWASH TAIL]
|
||||
"\u0240" => "z"
|
||||
|
||||
# ʐ [LATIN SMALL LETTER Z WITH RETROFLEX HOOK]
|
||||
"\u0290" => "z"
|
||||
|
||||
# ʑ [LATIN SMALL LETTER Z WITH CURL]
|
||||
"\u0291" => "z"
|
||||
|
||||
# ᵶ [LATIN SMALL LETTER Z WITH MIDDLE TILDE]
|
||||
"\u1D76" => "z"
|
||||
|
||||
# ᶎ [LATIN SMALL LETTER Z WITH PALATAL HOOK]
|
||||
"\u1D8E" => "z"
|
||||
|
||||
# ẑ [LATIN SMALL LETTER Z WITH CIRCUMFLEX]
|
||||
"\u1E91" => "z"
|
||||
|
||||
# ẓ [LATIN SMALL LETTER Z WITH DOT BELOW]
|
||||
"\u1E93" => "z"
|
||||
|
||||
# ẕ [LATIN SMALL LETTER Z WITH LINE BELOW]
|
||||
"\u1E95" => "z"
|
||||
|
||||
# ⓩ [CIRCLED LATIN SMALL LETTER Z]
|
||||
"\u24E9" => "z"
|
||||
|
||||
# ⱬ [LATIN SMALL LETTER Z WITH DESCENDER]
|
||||
"\u2C6C" => "z"
|
||||
|
||||
# ꝣ [LATIN SMALL LETTER VISIGOTHIC Z]
|
||||
"\uA763" => "z"
|
||||
|
||||
# z [FULLWIDTH LATIN SMALL LETTER Z]
|
||||
"\uFF5A" => "z"
|
||||
|
||||
# ⒵ [PARENTHESIZED LATIN SMALL LETTER Z]
|
||||
"\u24B5" => "(z)"
|
||||
|
||||
# ⁰ [SUPERSCRIPT ZERO]
|
||||
"\u2070" => "0"
|
||||
|
||||
# ₀ [SUBSCRIPT ZERO]
|
||||
"\u2080" => "0"
|
||||
|
||||
# ⓪ [CIRCLED DIGIT ZERO]
|
||||
"\u24EA" => "0"
|
||||
|
||||
# ⓿ [NEGATIVE CIRCLED DIGIT ZERO]
|
||||
"\u24FF" => "0"
|
||||
|
||||
# 0 [FULLWIDTH DIGIT ZERO]
|
||||
"\uFF10" => "0"
|
||||
|
||||
# ¹ [SUPERSCRIPT ONE]
|
||||
"\u00B9" => "1"
|
||||
|
||||
# ₁ [SUBSCRIPT ONE]
|
||||
"\u2081" => "1"
|
||||
|
||||
# ① [CIRCLED DIGIT ONE]
|
||||
"\u2460" => "1"
|
||||
|
||||
# ⓵ [DOUBLE CIRCLED DIGIT ONE]
|
||||
"\u24F5" => "1"
|
||||
|
||||
# ❶ [DINGBAT NEGATIVE CIRCLED DIGIT ONE]
|
||||
"\u2776" => "1"
|
||||
|
||||
# ➀ [DINGBAT CIRCLED SANS-SERIF DIGIT ONE]
|
||||
"\u2780" => "1"
|
||||
|
||||
# ➊ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE]
|
||||
"\u278A" => "1"
|
||||
|
||||
# 1 [FULLWIDTH DIGIT ONE]
|
||||
"\uFF11" => "1"
|
||||
|
||||
# ⒈ [DIGIT ONE FULL STOP]
|
||||
"\u2488" => "1."
|
||||
|
||||
# ⑴ [PARENTHESIZED DIGIT ONE]
|
||||
"\u2474" => "(1)"
|
||||
|
||||
# ² [SUPERSCRIPT TWO]
|
||||
"\u00B2" => "2"
|
||||
|
||||
# ₂ [SUBSCRIPT TWO]
|
||||
"\u2082" => "2"
|
||||
|
||||
# ② [CIRCLED DIGIT TWO]
|
||||
"\u2461" => "2"
|
||||
|
||||
# ⓶ [DOUBLE CIRCLED DIGIT TWO]
|
||||
"\u24F6" => "2"
|
||||
|
||||
# ❷ [DINGBAT NEGATIVE CIRCLED DIGIT TWO]
|
||||
"\u2777" => "2"
|
||||
|
||||
# ➁ [DINGBAT CIRCLED SANS-SERIF DIGIT TWO]
|
||||
"\u2781" => "2"
|
||||
|
||||
# ➋ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO]
|
||||
"\u278B" => "2"
|
||||
|
||||
# 2 [FULLWIDTH DIGIT TWO]
|
||||
"\uFF12" => "2"
|
||||
|
||||
# ⒉ [DIGIT TWO FULL STOP]
|
||||
"\u2489" => "2."
|
||||
|
||||
# ⑵ [PARENTHESIZED DIGIT TWO]
|
||||
"\u2475" => "(2)"
|
||||
|
||||
# ³ [SUPERSCRIPT THREE]
|
||||
"\u00B3" => "3"
|
||||
|
||||
# ₃ [SUBSCRIPT THREE]
|
||||
"\u2083" => "3"
|
||||
|
||||
# ③ [CIRCLED DIGIT THREE]
|
||||
"\u2462" => "3"
|
||||
|
||||
# ⓷ [DOUBLE CIRCLED DIGIT THREE]
|
||||
"\u24F7" => "3"
|
||||
|
||||
# ❸ [DINGBAT NEGATIVE CIRCLED DIGIT THREE]
|
||||
"\u2778" => "3"
|
||||
|
||||
# ➂ [DINGBAT CIRCLED SANS-SERIF DIGIT THREE]
|
||||
"\u2782" => "3"
|
||||
|
||||
# ➌ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE]
|
||||
"\u278C" => "3"
|
||||
|
||||
# 3 [FULLWIDTH DIGIT THREE]
|
||||
"\uFF13" => "3"
|
||||
|
||||
# ⒊ [DIGIT THREE FULL STOP]
|
||||
"\u248A" => "3."
|
||||
|
||||
# ⑶ [PARENTHESIZED DIGIT THREE]
|
||||
"\u2476" => "(3)"
|
||||
|
||||
# ⁴ [SUPERSCRIPT FOUR]
|
||||
"\u2074" => "4"
|
||||
|
||||
# ₄ [SUBSCRIPT FOUR]
|
||||
"\u2084" => "4"
|
||||
|
||||
# ④ [CIRCLED DIGIT FOUR]
|
||||
"\u2463" => "4"
|
||||
|
||||
# ⓸ [DOUBLE CIRCLED DIGIT FOUR]
|
||||
"\u24F8" => "4"
|
||||
|
||||
# ❹ [DINGBAT NEGATIVE CIRCLED DIGIT FOUR]
|
||||
"\u2779" => "4"
|
||||
|
||||
# ➃ [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR]
|
||||
"\u2783" => "4"
|
||||
|
||||
# ➍ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR]
|
||||
"\u278D" => "4"
|
||||
|
||||
# 4 [FULLWIDTH DIGIT FOUR]
|
||||
"\uFF14" => "4"
|
||||
|
||||
# ⒋ [DIGIT FOUR FULL STOP]
|
||||
"\u248B" => "4."
|
||||
|
||||
# ⑷ [PARENTHESIZED DIGIT FOUR]
|
||||
"\u2477" => "(4)"
|
||||
|
||||
# ⁵ [SUPERSCRIPT FIVE]
|
||||
"\u2075" => "5"
|
||||
|
||||
# ₅ [SUBSCRIPT FIVE]
|
||||
"\u2085" => "5"
|
||||
|
||||
# ⑤ [CIRCLED DIGIT FIVE]
|
||||
"\u2464" => "5"
|
||||
|
||||
# ⓹ [DOUBLE CIRCLED DIGIT FIVE]
|
||||
"\u24F9" => "5"
|
||||
|
||||
# ❺ [DINGBAT NEGATIVE CIRCLED DIGIT FIVE]
|
||||
"\u277A" => "5"
|
||||
|
||||
# ➄ [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE]
|
||||
"\u2784" => "5"
|
||||
|
||||
# ➎ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE]
|
||||
"\u278E" => "5"
|
||||
|
||||
# 5 [FULLWIDTH DIGIT FIVE]
|
||||
"\uFF15" => "5"
|
||||
|
||||
# ⒌ [DIGIT FIVE FULL STOP]
|
||||
"\u248C" => "5."
|
||||
|
||||
# ⑸ [PARENTHESIZED DIGIT FIVE]
|
||||
"\u2478" => "(5)"
|
||||
|
||||
# ⁶ [SUPERSCRIPT SIX]
|
||||
"\u2076" => "6"
|
||||
|
||||
# ₆ [SUBSCRIPT SIX]
|
||||
"\u2086" => "6"
|
||||
|
||||
# ⑥ [CIRCLED DIGIT SIX]
|
||||
"\u2465" => "6"
|
||||
|
||||
# ⓺ [DOUBLE CIRCLED DIGIT SIX]
|
||||
"\u24FA" => "6"
|
||||
|
||||
# ❻ [DINGBAT NEGATIVE CIRCLED DIGIT SIX]
|
||||
"\u277B" => "6"
|
||||
|
||||
# ➅ [DINGBAT CIRCLED SANS-SERIF DIGIT SIX]
|
||||
"\u2785" => "6"
|
||||
|
||||
# ➏ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX]
|
||||
"\u278F" => "6"
|
||||
|
||||
# 6 [FULLWIDTH DIGIT SIX]
|
||||
"\uFF16" => "6"
|
||||
|
||||
# ⒍ [DIGIT SIX FULL STOP]
|
||||
"\u248D" => "6."
|
||||
|
||||
# ⑹ [PARENTHESIZED DIGIT SIX]
|
||||
"\u2479" => "(6)"
|
||||
|
||||
# ⁷ [SUPERSCRIPT SEVEN]
|
||||
"\u2077" => "7"
|
||||
|
||||
# ₇ [SUBSCRIPT SEVEN]
|
||||
"\u2087" => "7"
|
||||
|
||||
# ⑦ [CIRCLED DIGIT SEVEN]
|
||||
"\u2466" => "7"
|
||||
|
||||
# ⓻ [DOUBLE CIRCLED DIGIT SEVEN]
|
||||
"\u24FB" => "7"
|
||||
|
||||
# ❼ [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN]
|
||||
"\u277C" => "7"
|
||||
|
||||
# ➆ [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN]
|
||||
"\u2786" => "7"
|
||||
|
||||
# ➐ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN]
|
||||
"\u2790" => "7"
|
||||
|
||||
# 7 [FULLWIDTH DIGIT SEVEN]
|
||||
"\uFF17" => "7"
|
||||
|
||||
# ⒎ [DIGIT SEVEN FULL STOP]
|
||||
"\u248E" => "7."
|
||||
|
||||
# ⑺ [PARENTHESIZED DIGIT SEVEN]
|
||||
"\u247A" => "(7)"
|
||||
|
||||
# ⁸ [SUPERSCRIPT EIGHT]
|
||||
"\u2078" => "8"
|
||||
|
||||
# ₈ [SUBSCRIPT EIGHT]
|
||||
"\u2088" => "8"
|
||||
|
||||
# ⑧ [CIRCLED DIGIT EIGHT]
|
||||
"\u2467" => "8"
|
||||
|
||||
# ⓼ [DOUBLE CIRCLED DIGIT EIGHT]
|
||||
"\u24FC" => "8"
|
||||
|
||||
# ❽ [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT]
|
||||
"\u277D" => "8"
|
||||
|
||||
# ➇ [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT]
|
||||
"\u2787" => "8"
|
||||
|
||||
# ➑ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT]
|
||||
"\u2791" => "8"
|
||||
|
||||
# 8 [FULLWIDTH DIGIT EIGHT]
|
||||
"\uFF18" => "8"
|
||||
|
||||
# ⒏ [DIGIT EIGHT FULL STOP]
|
||||
"\u248F" => "8."
|
||||
|
||||
# ⑻ [PARENTHESIZED DIGIT EIGHT]
|
||||
"\u247B" => "(8)"
|
||||
|
||||
# ⁹ [SUPERSCRIPT NINE]
|
||||
"\u2079" => "9"
|
||||
|
||||
# ₉ [SUBSCRIPT NINE]
|
||||
"\u2089" => "9"
|
||||
|
||||
# ⑨ [CIRCLED DIGIT NINE]
|
||||
"\u2468" => "9"
|
||||
|
||||
# ⓽ [DOUBLE CIRCLED DIGIT NINE]
|
||||
"\u24FD" => "9"
|
||||
|
||||
# ❾ [DINGBAT NEGATIVE CIRCLED DIGIT NINE]
|
||||
"\u277E" => "9"
|
||||
|
||||
# ➈ [DINGBAT CIRCLED SANS-SERIF DIGIT NINE]
|
||||
"\u2788" => "9"
|
||||
|
||||
# ➒ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE]
|
||||
"\u2792" => "9"
|
||||
|
||||
# 9 [FULLWIDTH DIGIT NINE]
|
||||
"\uFF19" => "9"
|
||||
|
||||
# ⒐ [DIGIT NINE FULL STOP]
|
||||
"\u2490" => "9."
|
||||
|
||||
# ⑼ [PARENTHESIZED DIGIT NINE]
|
||||
"\u247C" => "(9)"
|
||||
|
||||
# ⑩ [CIRCLED NUMBER TEN]
|
||||
"\u2469" => "10"
|
||||
|
||||
# ⓾ [DOUBLE CIRCLED NUMBER TEN]
|
||||
"\u24FE" => "10"
|
||||
|
||||
# ❿ [DINGBAT NEGATIVE CIRCLED NUMBER TEN]
|
||||
"\u277F" => "10"
|
||||
|
||||
# ➉ [DINGBAT CIRCLED SANS-SERIF NUMBER TEN]
|
||||
"\u2789" => "10"
|
||||
|
||||
# ➓ [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN]
|
||||
"\u2793" => "10"
|
||||
|
||||
# ⒑ [NUMBER TEN FULL STOP]
|
||||
"\u2491" => "10."
|
||||
|
||||
# ⑽ [PARENTHESIZED NUMBER TEN]
|
||||
"\u247D" => "(10)"
|
||||
|
||||
# ⑪ [CIRCLED NUMBER ELEVEN]
|
||||
"\u246A" => "11"
|
||||
|
||||
# ⓫ [NEGATIVE CIRCLED NUMBER ELEVEN]
|
||||
"\u24EB" => "11"
|
||||
|
||||
# ⒒ [NUMBER ELEVEN FULL STOP]
|
||||
"\u2492" => "11."
|
||||
|
||||
# ⑾ [PARENTHESIZED NUMBER ELEVEN]
|
||||
"\u247E" => "(11)"
|
||||
|
||||
# ⑫ [CIRCLED NUMBER TWELVE]
|
||||
"\u246B" => "12"
|
||||
|
||||
# ⓬ [NEGATIVE CIRCLED NUMBER TWELVE]
|
||||
"\u24EC" => "12"
|
||||
|
||||
# ⒓ [NUMBER TWELVE FULL STOP]
|
||||
"\u2493" => "12."
|
||||
|
||||
# ⑿ [PARENTHESIZED NUMBER TWELVE]
|
||||
"\u247F" => "(12)"
|
||||
|
||||
# ⑬ [CIRCLED NUMBER THIRTEEN]
|
||||
"\u246C" => "13"
|
||||
|
||||
# ⓭ [NEGATIVE CIRCLED NUMBER THIRTEEN]
|
||||
"\u24ED" => "13"
|
||||
|
||||
# ⒔ [NUMBER THIRTEEN FULL STOP]
|
||||
"\u2494" => "13."
|
||||
|
||||
# ⒀ [PARENTHESIZED NUMBER THIRTEEN]
|
||||
"\u2480" => "(13)"
|
||||
|
||||
# ⑭ [CIRCLED NUMBER FOURTEEN]
|
||||
"\u246D" => "14"
|
||||
|
||||
# ⓮ [NEGATIVE CIRCLED NUMBER FOURTEEN]
|
||||
"\u24EE" => "14"
|
||||
|
||||
# ⒕ [NUMBER FOURTEEN FULL STOP]
|
||||
"\u2495" => "14."
|
||||
|
||||
# ⒁ [PARENTHESIZED NUMBER FOURTEEN]
|
||||
"\u2481" => "(14)"
|
||||
|
||||
# ⑮ [CIRCLED NUMBER FIFTEEN]
|
||||
"\u246E" => "15"
|
||||
|
||||
# ⓯ [NEGATIVE CIRCLED NUMBER FIFTEEN]
|
||||
"\u24EF" => "15"
|
||||
|
||||
# ⒖ [NUMBER FIFTEEN FULL STOP]
|
||||
"\u2496" => "15."
|
||||
|
||||
# ⒂ [PARENTHESIZED NUMBER FIFTEEN]
|
||||
"\u2482" => "(15)"
|
||||
|
||||
# ⑯ [CIRCLED NUMBER SIXTEEN]
|
||||
"\u246F" => "16"
|
||||
|
||||
# ⓰ [NEGATIVE CIRCLED NUMBER SIXTEEN]
|
||||
"\u24F0" => "16"
|
||||
|
||||
# ⒗ [NUMBER SIXTEEN FULL STOP]
|
||||
"\u2497" => "16."
|
||||
|
||||
# ⒃ [PARENTHESIZED NUMBER SIXTEEN]
|
||||
"\u2483" => "(16)"
|
||||
|
||||
# ⑰ [CIRCLED NUMBER SEVENTEEN]
|
||||
"\u2470" => "17"
|
||||
|
||||
# ⓱ [NEGATIVE CIRCLED NUMBER SEVENTEEN]
|
||||
"\u24F1" => "17"
|
||||
|
||||
# ⒘ [NUMBER SEVENTEEN FULL STOP]
|
||||
"\u2498" => "17."
|
||||
|
||||
# ⒄ [PARENTHESIZED NUMBER SEVENTEEN]
|
||||
"\u2484" => "(17)"
|
||||
|
||||
# ⑱ [CIRCLED NUMBER EIGHTEEN]
|
||||
"\u2471" => "18"
|
||||
|
||||
# ⓲ [NEGATIVE CIRCLED NUMBER EIGHTEEN]
|
||||
"\u24F2" => "18"
|
||||
|
||||
# ⒙ [NUMBER EIGHTEEN FULL STOP]
|
||||
"\u2499" => "18."
|
||||
|
||||
# ⒅ [PARENTHESIZED NUMBER EIGHTEEN]
|
||||
"\u2485" => "(18)"
|
||||
|
||||
# ⑲ [CIRCLED NUMBER NINETEEN]
|
||||
"\u2472" => "19"
|
||||
|
||||
# ⓳ [NEGATIVE CIRCLED NUMBER NINETEEN]
|
||||
"\u24F3" => "19"
|
||||
|
||||
# ⒚ [NUMBER NINETEEN FULL STOP]
|
||||
"\u249A" => "19."
|
||||
|
||||
# ⒆ [PARENTHESIZED NUMBER NINETEEN]
|
||||
"\u2486" => "(19)"
|
||||
|
||||
# ⑳ [CIRCLED NUMBER TWENTY]
|
||||
"\u2473" => "20"
|
||||
|
||||
# ⓴ [NEGATIVE CIRCLED NUMBER TWENTY]
|
||||
"\u24F4" => "20"
|
||||
|
||||
# ⒛ [NUMBER TWENTY FULL STOP]
|
||||
"\u249B" => "20."
|
||||
|
||||
# ⒇ [PARENTHESIZED NUMBER TWENTY]
|
||||
"\u2487" => "(20)"
|
||||
|
||||
# « [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK]
|
||||
"\u00AB" => "\""
|
||||
|
||||
# » [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK]
|
||||
"\u00BB" => "\""
|
||||
|
||||
# “ [LEFT DOUBLE QUOTATION MARK]
|
||||
"\u201C" => "\""
|
||||
|
||||
# ” [RIGHT DOUBLE QUOTATION MARK]
|
||||
"\u201D" => "\""
|
||||
|
||||
# „ [DOUBLE LOW-9 QUOTATION MARK]
|
||||
"\u201E" => "\""
|
||||
|
||||
# ″ [DOUBLE PRIME]
|
||||
"\u2033" => "\""
|
||||
|
||||
# ‶ [REVERSED DOUBLE PRIME]
|
||||
"\u2036" => "\""
|
||||
|
||||
# ❝ [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT]
|
||||
"\u275D" => "\""
|
||||
|
||||
# ❞ [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT]
|
||||
"\u275E" => "\""
|
||||
|
||||
# ❮ [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT]
|
||||
"\u276E" => "\""
|
||||
|
||||
# ❯ [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT]
|
||||
"\u276F" => "\""
|
||||
|
||||
# " [FULLWIDTH QUOTATION MARK]
|
||||
"\uFF02" => "\""
|
||||
|
||||
# ‘ [LEFT SINGLE QUOTATION MARK]
|
||||
"\u2018" => "\'"
|
||||
|
||||
# ’ [RIGHT SINGLE QUOTATION MARK]
|
||||
"\u2019" => "\'"
|
||||
|
||||
# ‚ [SINGLE LOW-9 QUOTATION MARK]
|
||||
"\u201A" => "\'"
|
||||
|
||||
# ‛ [SINGLE HIGH-REVERSED-9 QUOTATION MARK]
|
||||
"\u201B" => "\'"
|
||||
|
||||
# ′ [PRIME]
|
||||
"\u2032" => "\'"
|
||||
|
||||
# ‵ [REVERSED PRIME]
|
||||
"\u2035" => "\'"
|
||||
|
||||
# ‹ [SINGLE LEFT-POINTING ANGLE QUOTATION MARK]
|
||||
"\u2039" => "\'"
|
||||
|
||||
# › [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK]
|
||||
"\u203A" => "\'"
|
||||
|
||||
# ❛ [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT]
|
||||
"\u275B" => "\'"
|
||||
|
||||
# ❜ [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT]
|
||||
"\u275C" => "\'"
|
||||
|
||||
# ' [FULLWIDTH APOSTROPHE]
|
||||
"\uFF07" => "\'"
|
||||
|
||||
# ‐ [HYPHEN]
|
||||
"\u2010" => "-"
|
||||
|
||||
# ‑ [NON-BREAKING HYPHEN]
|
||||
"\u2011" => "-"
|
||||
|
||||
# ‒ [FIGURE DASH]
|
||||
"\u2012" => "-"
|
||||
|
||||
# – [EN DASH]
|
||||
"\u2013" => "-"
|
||||
|
||||
# — [EM DASH]
|
||||
"\u2014" => "-"
|
||||
|
||||
# ⁻ [SUPERSCRIPT MINUS]
|
||||
"\u207B" => "-"
|
||||
|
||||
# ₋ [SUBSCRIPT MINUS]
|
||||
"\u208B" => "-"
|
||||
|
||||
# - [FULLWIDTH HYPHEN-MINUS]
|
||||
"\uFF0D" => "-"
|
||||
|
||||
# ⁅ [LEFT SQUARE BRACKET WITH QUILL]
|
||||
"\u2045" => "["
|
||||
|
||||
# ❲ [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT]
|
||||
"\u2772" => "["
|
||||
|
||||
# [ [FULLWIDTH LEFT SQUARE BRACKET]
|
||||
"\uFF3B" => "["
|
||||
|
||||
# ⁆ [RIGHT SQUARE BRACKET WITH QUILL]
|
||||
"\u2046" => "]"
|
||||
|
||||
# ❳ [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT]
|
||||
"\u2773" => "]"
|
||||
|
||||
# ] [FULLWIDTH RIGHT SQUARE BRACKET]
|
||||
"\uFF3D" => "]"
|
||||
|
||||
# ⁽ [SUPERSCRIPT LEFT PARENTHESIS]
|
||||
"\u207D" => "("
|
||||
|
||||
# ₍ [SUBSCRIPT LEFT PARENTHESIS]
|
||||
"\u208D" => "("
|
||||
|
||||
# ❨ [MEDIUM LEFT PARENTHESIS ORNAMENT]
|
||||
"\u2768" => "("
|
||||
|
||||
# ❪ [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT]
|
||||
"\u276A" => "("
|
||||
|
||||
# ( [FULLWIDTH LEFT PARENTHESIS]
|
||||
"\uFF08" => "("
|
||||
|
||||
# ⸨ [LEFT DOUBLE PARENTHESIS]
|
||||
"\u2E28" => "(("
|
||||
|
||||
# ⁾ [SUPERSCRIPT RIGHT PARENTHESIS]
|
||||
"\u207E" => ")"
|
||||
|
||||
# ₎ [SUBSCRIPT RIGHT PARENTHESIS]
|
||||
"\u208E" => ")"
|
||||
|
||||
# ❩ [MEDIUM RIGHT PARENTHESIS ORNAMENT]
|
||||
"\u2769" => ")"
|
||||
|
||||
# ❫ [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT]
|
||||
"\u276B" => ")"
|
||||
|
||||
# ) [FULLWIDTH RIGHT PARENTHESIS]
|
||||
"\uFF09" => ")"
|
||||
|
||||
# ⸩ [RIGHT DOUBLE PARENTHESIS]
|
||||
"\u2E29" => "))"
|
||||
|
||||
# ❬ [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT]
|
||||
"\u276C" => "<"
|
||||
|
||||
# ❰ [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT]
|
||||
"\u2770" => "<"
|
||||
|
||||
# < [FULLWIDTH LESS-THAN SIGN]
|
||||
"\uFF1C" => "<"
|
||||
|
||||
# ❭ [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT]
|
||||
"\u276D" => ">"
|
||||
|
||||
# ❱ [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT]
|
||||
"\u2771" => ">"
|
||||
|
||||
# > [FULLWIDTH GREATER-THAN SIGN]
|
||||
"\uFF1E" => ">"
|
||||
|
||||
# ❴ [MEDIUM LEFT CURLY BRACKET ORNAMENT]
|
||||
"\u2774" => "{"
|
||||
|
||||
# { [FULLWIDTH LEFT CURLY BRACKET]
|
||||
"\uFF5B" => "{"
|
||||
|
||||
# ❵ [MEDIUM RIGHT CURLY BRACKET ORNAMENT]
|
||||
"\u2775" => "}"
|
||||
|
||||
# } [FULLWIDTH RIGHT CURLY BRACKET]
|
||||
"\uFF5D" => "}"
|
||||
|
||||
# ⁺ [SUPERSCRIPT PLUS SIGN]
|
||||
"\u207A" => "+"
|
||||
|
||||
# ₊ [SUBSCRIPT PLUS SIGN]
|
||||
"\u208A" => "+"
|
||||
|
||||
# + [FULLWIDTH PLUS SIGN]
|
||||
"\uFF0B" => "+"
|
||||
|
||||
# ⁼ [SUPERSCRIPT EQUALS SIGN]
|
||||
"\u207C" => "="
|
||||
|
||||
# ₌ [SUBSCRIPT EQUALS SIGN]
|
||||
"\u208C" => "="
|
||||
|
||||
# = [FULLWIDTH EQUALS SIGN]
|
||||
"\uFF1D" => "="
|
||||
|
||||
# ! [FULLWIDTH EXCLAMATION MARK]
|
||||
"\uFF01" => "!"
|
||||
|
||||
# ‼ [DOUBLE EXCLAMATION MARK]
|
||||
"\u203C" => "!!"
|
||||
|
||||
# ⁉ [EXCLAMATION QUESTION MARK]
|
||||
"\u2049" => "!?"
|
||||
|
||||
# # [FULLWIDTH NUMBER SIGN]
|
||||
"\uFF03" => "#"
|
||||
|
||||
# $ [FULLWIDTH DOLLAR SIGN]
|
||||
"\uFF04" => "$"
|
||||
|
||||
# ⁒ [COMMERCIAL MINUS SIGN]
|
||||
"\u2052" => "%"
|
||||
|
||||
# % [FULLWIDTH PERCENT SIGN]
|
||||
"\uFF05" => "%"
|
||||
|
||||
# & [FULLWIDTH AMPERSAND]
|
||||
"\uFF06" => "&"
|
||||
|
||||
# ⁎ [LOW ASTERISK]
|
||||
"\u204E" => "*"
|
||||
|
||||
# * [FULLWIDTH ASTERISK]
|
||||
"\uFF0A" => "*"
|
||||
|
||||
# , [FULLWIDTH COMMA]
|
||||
"\uFF0C" => ","
|
||||
|
||||
# . [FULLWIDTH FULL STOP]
|
||||
"\uFF0E" => "."
|
||||
|
||||
# ⁄ [FRACTION SLASH]
|
||||
"\u2044" => "/"
|
||||
|
||||
# / [FULLWIDTH SOLIDUS]
|
||||
"\uFF0F" => "/"
|
||||
|
||||
# : [FULLWIDTH COLON]
|
||||
"\uFF1A" => ":"
|
||||
|
||||
# ⁏ [REVERSED SEMICOLON]
|
||||
"\u204F" => ";"
|
||||
|
||||
# ; [FULLWIDTH SEMICOLON]
|
||||
"\uFF1B" => ";"
|
||||
|
||||
# ? [FULLWIDTH QUESTION MARK]
|
||||
"\uFF1F" => "?"
|
||||
|
||||
# ⁇ [DOUBLE QUESTION MARK]
|
||||
"\u2047" => "??"
|
||||
|
||||
# ⁈ [QUESTION EXCLAMATION MARK]
|
||||
"\u2048" => "?!"
|
||||
|
||||
# @ [FULLWIDTH COMMERCIAL AT]
|
||||
"\uFF20" => "@"
|
||||
|
||||
# \ [FULLWIDTH REVERSE SOLIDUS]
|
||||
"\uFF3C" => "\\"
|
||||
|
||||
# ‸ [CARET]
|
||||
"\u2038" => "^"
|
||||
|
||||
# ^ [FULLWIDTH CIRCUMFLEX ACCENT]
|
||||
"\uFF3E" => "^"
|
||||
|
||||
# _ [FULLWIDTH LOW LINE]
|
||||
"\uFF3F" => "_"
|
||||
|
||||
# ⁓ [SWUNG DASH]
|
||||
"\u2053" => "~"
|
||||
|
||||
# ~ [FULLWIDTH TILDE]
|
||||
"\uFF5E" => "~"
|
||||
|
||||
################################################################
|
||||
# Below is the Perl script used to generate the above mappings #
|
||||
# from ASCIIFoldingFilter.java: #
|
||||
################################################################
|
||||
#
|
||||
# #!/usr/bin/perl
|
||||
#
|
||||
# use warnings;
|
||||
# use strict;
|
||||
#
|
||||
# my @source_chars = ();
|
||||
# my @source_char_descriptions = ();
|
||||
# my $target = '';
|
||||
#
|
||||
# while (<>) {
|
||||
# if (/case\s+'(\\u[A-F0-9]+)':\s*\/\/\s*(.*)/i) {
|
||||
# push @source_chars, $1;
|
||||
# push @source_char_descriptions, $2;
|
||||
# next;
|
||||
# }
|
||||
# if (/output\[[^\]]+\]\s*=\s*'(\\'|\\\\|.)'/) {
|
||||
# $target .= $1;
|
||||
# next;
|
||||
# }
|
||||
# if (/break;/) {
|
||||
# $target = "\\\"" if ($target eq '"');
|
||||
# for my $source_char_num (0..$#source_chars) {
|
||||
# print "# $source_char_descriptions[$source_char_num]\n";
|
||||
# print "\"$source_chars[$source_char_num]\" => \"$target\"\n\n";
|
||||
# }
|
||||
# @source_chars = ();
|
||||
# @source_char_descriptions = ();
|
||||
# $target = '';
|
||||
# }
|
||||
# }
|
21
conf/solr/4/extras/protwords.txt
Normal file
21
conf/solr/4/extras/protwords.txt
Normal file
@ -0,0 +1,21 @@
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# Use a protected word file to protect against the stemmer reducing two
|
||||
# unrelated words to the same base word.
|
||||
|
||||
# Some non-words that normally won't be encountered,
|
||||
# just to test that they won't be stemmed.
|
||||
dontstems
|
||||
zwhacky
|
||||
|
1823
conf/solr/4/extras/solrconfig.xml
Normal file
1823
conf/solr/4/extras/solrconfig.xml
Normal file
@ -0,0 +1,1823 @@
|
||||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!--
|
||||
For more details about configurations options that may appear in
|
||||
this file, see http://wiki.apache.org/solr/SolrConfigXml.
|
||||
-->
|
||||
<config>
|
||||
<!-- In all configuration below, a prefix of "solr." for class names
|
||||
is an alias that causes solr to search appropriate packages,
|
||||
including org.apache.solr.(search|update|request|core|analysis)
|
||||
|
||||
You may also specify a fully qualified Java classname if you
|
||||
have your own custom plugins.
|
||||
-->
|
||||
|
||||
<!-- Controls what version of Lucene various components of Solr
|
||||
adhere to. Generally, you want to use the latest version to
|
||||
get all bug fixes and improvements. It is highly recommended
|
||||
that you fully re-index after changing this setting as it can
|
||||
affect both how text is indexed and queried.
|
||||
-->
|
||||
<luceneMatchVersion>4.4</luceneMatchVersion>
|
||||
|
||||
<!-- <lib/> directives can be used to instruct Solr to load an Jars
|
||||
identified and use them to resolve any "plugins" specified in
|
||||
your solrconfig.xml or schema.xml (ie: Analyzers, Request
|
||||
Handlers, etc...).
|
||||
|
||||
All directories and paths are resolved relative to the
|
||||
instanceDir.
|
||||
|
||||
Please note that <lib/> directives are processed in the order
|
||||
that they appear in your solrconfig.xml file, and are "stacked"
|
||||
on top of each other when building a ClassLoader - so if you have
|
||||
plugin jars with dependencies on other jars, the "lower level"
|
||||
dependency jars should be loaded first.
|
||||
|
||||
If a "./lib" directory exists in your instanceDir, all files
|
||||
found in it are included as if you had used the following
|
||||
syntax...
|
||||
|
||||
<lib dir="./lib" />
|
||||
-->
|
||||
|
||||
<!-- A 'dir' option by itself adds any files found in the directory
|
||||
to the classpath, this is useful for including all jars in a
|
||||
directory.
|
||||
|
||||
When a 'regex' is specified in addition to a 'dir', only the
|
||||
files in that directory which completely match the regex
|
||||
(anchored on both ends) will be included.
|
||||
|
||||
If a 'dir' option (with or without a regex) is used and nothing
|
||||
is found that matches, a warning will be logged.
|
||||
|
||||
The examples below can be used to load some solr-contribs along
|
||||
with their external dependencies.
|
||||
-->
|
||||
<lib dir="../../../contrib/extraction/lib" regex=".*\.jar" />
|
||||
<lib dir="../../../dist/" regex="solr-cell-\d.*\.jar" />
|
||||
|
||||
<lib dir="../../../contrib/clustering/lib/" regex=".*\.jar" />
|
||||
<lib dir="../../../dist/" regex="solr-clustering-\d.*\.jar" />
|
||||
|
||||
<lib dir="../../../contrib/langid/lib/" regex=".*\.jar" />
|
||||
<lib dir="../../../dist/" regex="solr-langid-\d.*\.jar" />
|
||||
|
||||
<lib dir="../../../contrib/velocity/lib" regex=".*\.jar" />
|
||||
<lib dir="../../../dist/" regex="solr-velocity-\d.*\.jar" />
|
||||
|
||||
<!-- an exact 'path' can be used instead of a 'dir' to specify a
|
||||
specific jar file. This will cause a serious error to be logged
|
||||
if it can't be loaded.
|
||||
-->
|
||||
<!--
|
||||
<lib path="../a-jar-that-does-not-exist.jar" />
|
||||
-->
|
||||
|
||||
<!-- Data Directory
|
||||
|
||||
Used to specify an alternate directory to hold all index data
|
||||
other than the default ./data under the Solr home. If
|
||||
replication is in use, this should match the replication
|
||||
configuration.
|
||||
-->
|
||||
<dataDir>${solr.data.dir:}</dataDir>
|
||||
|
||||
|
||||
<!-- The DirectoryFactory to use for indexes.
|
||||
|
||||
solr.StandardDirectoryFactory is filesystem
|
||||
based and tries to pick the best implementation for the current
|
||||
JVM and platform. solr.NRTCachingDirectoryFactory, the default,
|
||||
wraps solr.StandardDirectoryFactory and caches small files in memory
|
||||
for better NRT performance.
|
||||
|
||||
One can force a particular implementation via solr.MMapDirectoryFactory,
|
||||
solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory.
|
||||
|
||||
solr.RAMDirectoryFactory is memory based, not
|
||||
persistent, and doesn't work with replication.
|
||||
-->
|
||||
<directoryFactory name="DirectoryFactory"
|
||||
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
|
||||
|
||||
<!-- The CodecFactory for defining the format of the inverted index.
|
||||
The default implementation is SchemaCodecFactory, which is the official Lucene
|
||||
index format, but hooks into the schema to provide per-field customization of
|
||||
the postings lists and per-document values in the fieldType element
|
||||
(postingsFormat/docValuesFormat). Note that most of the alternative implementations
|
||||
are experimental, so if you choose to customize the index format, its a good
|
||||
idea to convert back to the official format e.g. via IndexWriter.addIndexes(IndexReader)
|
||||
before upgrading to a newer version to avoid unnecessary reindexing.
|
||||
-->
|
||||
<codecFactory class="solr.SchemaCodecFactory"/>
|
||||
|
||||
<!-- To enable dynamic schema REST APIs, use the following for <schemaFactory>:
|
||||
|
||||
<schemaFactory class="ManagedIndexSchemaFactory">
|
||||
<bool name="mutable">true</bool>
|
||||
<str name="managedSchemaResourceName">managed-schema</str>
|
||||
</schemaFactory>
|
||||
|
||||
When ManagedIndexSchemaFactory is specified, Solr will load the schema from
|
||||
he resource named in 'managedSchemaResourceName', rather than from schema.xml.
|
||||
Note that the managed schema resource CANNOT be named schema.xml. If the managed
|
||||
schema does not exist, Solr will create it after reading schema.xml, then rename
|
||||
'schema.xml' to 'schema.xml.bak'.
|
||||
|
||||
Do NOT hand edit the managed schema - external modifications will be ignored and
|
||||
overwritten as a result of schema modification REST API calls.
|
||||
|
||||
When ManagedIndexSchemaFactory is specified with mutable = true, schema
|
||||
modification REST API calls will be allowed; otherwise, error responses will be
|
||||
sent back for these requests.
|
||||
-->
|
||||
<schemaFactory class="ClassicIndexSchemaFactory"/>
|
||||
|
||||
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
Index Config - These settings control low-level behavior of indexing
|
||||
Most example settings here show the default value, but are commented
|
||||
out, to more easily see where customizations have been made.
|
||||
|
||||
Note: This replaces <indexDefaults> and <mainIndex> from older versions
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
|
||||
<indexConfig>
|
||||
<!-- maxFieldLength was removed in 4.0. To get similar behavior, include a
|
||||
LimitTokenCountFilterFactory in your fieldType definition. E.g.
|
||||
<filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/>
|
||||
-->
|
||||
<!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 -->
|
||||
<!-- <writeLockTimeout>1000</writeLockTimeout> -->
|
||||
|
||||
<!-- The maximum number of simultaneous threads that may be
|
||||
indexing documents at once in IndexWriter; if more than this
|
||||
many threads arrive they will wait for others to finish.
|
||||
Default in Solr/Lucene is 8. -->
|
||||
<!-- <maxIndexingThreads>8</maxIndexingThreads> -->
|
||||
|
||||
<!-- Expert: Enabling compound file will use less files for the index,
|
||||
using fewer file descriptors on the expense of performance decrease.
|
||||
Default in Lucene is "true". Default in Solr is "false" (since 3.6) -->
|
||||
<!-- <useCompoundFile>false</useCompoundFile> -->
|
||||
|
||||
<!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene
|
||||
indexing for buffering added documents and deletions before they are
|
||||
flushed to the Directory.
|
||||
maxBufferedDocs sets a limit on the number of documents buffered
|
||||
before flushing.
|
||||
If both ramBufferSizeMB and maxBufferedDocs is set, then
|
||||
Lucene will flush based on whichever limit is hit first.
|
||||
The default is 100 MB. -->
|
||||
<!-- <ramBufferSizeMB>100</ramBufferSizeMB> -->
|
||||
<!-- <maxBufferedDocs>1000</maxBufferedDocs> -->
|
||||
|
||||
<!-- Expert: Merge Policy
|
||||
The Merge Policy in Lucene controls how merging of segments is done.
|
||||
The default since Solr/Lucene 3.3 is TieredMergePolicy.
|
||||
The default since Lucene 2.3 was the LogByteSizeMergePolicy,
|
||||
Even older versions of Lucene used LogDocMergePolicy.
|
||||
-->
|
||||
<!--
|
||||
<mergePolicy class="org.apache.lucene.index.TieredMergePolicy">
|
||||
<int name="maxMergeAtOnce">10</int>
|
||||
<int name="segmentsPerTier">10</int>
|
||||
</mergePolicy>
|
||||
-->
|
||||
|
||||
<!-- Merge Factor
|
||||
The merge factor controls how many segments will get merged at a time.
|
||||
For TieredMergePolicy, mergeFactor is a convenience parameter which
|
||||
will set both MaxMergeAtOnce and SegmentsPerTier at once.
|
||||
For LogByteSizeMergePolicy, mergeFactor decides how many new segments
|
||||
will be allowed before they are merged into one.
|
||||
Default is 10 for both merge policies.
|
||||
-->
|
||||
<!--
|
||||
<mergeFactor>10</mergeFactor>
|
||||
-->
|
||||
|
||||
<!-- Expert: Merge Scheduler
|
||||
The Merge Scheduler in Lucene controls how merges are
|
||||
performed. The ConcurrentMergeScheduler (Lucene 2.3 default)
|
||||
can perform merges in the background using separate threads.
|
||||
The SerialMergeScheduler (Lucene 2.2 default) does not.
|
||||
-->
|
||||
<!--
|
||||
<mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/>
|
||||
-->
|
||||
|
||||
<!-- LockFactory
|
||||
|
||||
This option specifies which Lucene LockFactory implementation
|
||||
to use.
|
||||
|
||||
single = SingleInstanceLockFactory - suggested for a
|
||||
read-only index or when there is no possibility of
|
||||
another process trying to modify the index.
|
||||
native = NativeFSLockFactory - uses OS native file locking.
|
||||
Do not use when multiple solr webapps in the same
|
||||
JVM are attempting to share a single index.
|
||||
simple = SimpleFSLockFactory - uses a plain file for locking
|
||||
|
||||
Defaults: 'native' is default for Solr3.6 and later, otherwise
|
||||
'simple' is the default
|
||||
|
||||
More details on the nuances of each LockFactory...
|
||||
http://wiki.apache.org/lucene-java/AvailableLockFactories
|
||||
-->
|
||||
<lockType>${solr.lock.type:native}</lockType>
|
||||
|
||||
<!-- Unlock On Startup
|
||||
|
||||
If true, unlock any held write or commit locks on startup.
|
||||
This defeats the locking mechanism that allows multiple
|
||||
processes to safely access a lucene index, and should be used
|
||||
with care. Default is "false".
|
||||
|
||||
This is not needed if lock type is 'single'
|
||||
-->
|
||||
<!--
|
||||
<unlockOnStartup>false</unlockOnStartup>
|
||||
-->
|
||||
|
||||
<!-- Expert: Controls how often Lucene loads terms into memory
|
||||
Default is 128 and is likely good for most everyone.
|
||||
-->
|
||||
<!-- <termIndexInterval>128</termIndexInterval> -->
|
||||
|
||||
<!-- If true, IndexReaders will be reopened (often more efficient)
|
||||
instead of closed and then opened. Default: true
|
||||
-->
|
||||
<!--
|
||||
<reopenReaders>true</reopenReaders>
|
||||
-->
|
||||
|
||||
<!-- Commit Deletion Policy
|
||||
Custom deletion policies can be specified here. The class must
|
||||
implement org.apache.lucene.index.IndexDeletionPolicy.
|
||||
|
||||
The default Solr IndexDeletionPolicy implementation supports
|
||||
deleting index commit points on number of commits, age of
|
||||
commit point and optimized status.
|
||||
|
||||
The latest commit point should always be preserved regardless
|
||||
of the criteria.
|
||||
-->
|
||||
<!--
|
||||
<deletionPolicy class="solr.SolrDeletionPolicy">
|
||||
-->
|
||||
<!-- The number of commit points to be kept -->
|
||||
<!-- <str name="maxCommitsToKeep">1</str> -->
|
||||
<!-- The number of optimized commit points to be kept -->
|
||||
<!-- <str name="maxOptimizedCommitsToKeep">0</str> -->
|
||||
<!--
|
||||
Delete all commit points once they have reached the given age.
|
||||
Supports DateMathParser syntax e.g.
|
||||
-->
|
||||
<!--
|
||||
<str name="maxCommitAge">30MINUTES</str>
|
||||
<str name="maxCommitAge">1DAY</str>
|
||||
-->
|
||||
<!--
|
||||
</deletionPolicy>
|
||||
-->
|
||||
|
||||
<!-- Lucene Infostream
|
||||
|
||||
To aid in advanced debugging, Lucene provides an "InfoStream"
|
||||
of detailed information when indexing.
|
||||
|
||||
Setting the value to true will instruct the underlying Lucene
|
||||
IndexWriter to write its info stream to solr's log. By default,
|
||||
this is enabled here, and controlled through log4j.properties.
|
||||
-->
|
||||
<infoStream>true</infoStream>
|
||||
</indexConfig>
|
||||
|
||||
|
||||
<!-- JMX
|
||||
|
||||
This example enables JMX if and only if an existing MBeanServer
|
||||
is found, use this if you want to configure JMX through JVM
|
||||
parameters. Remove this to disable exposing Solr configuration
|
||||
and statistics to JMX.
|
||||
|
||||
For more details see http://wiki.apache.org/solr/SolrJmx
|
||||
-->
|
||||
<jmx />
|
||||
<!-- If you want to connect to a particular server, specify the
|
||||
agentId
|
||||
-->
|
||||
<!-- <jmx agentId="myAgent" /> -->
|
||||
<!-- If you want to start a new MBeanServer, specify the serviceUrl -->
|
||||
<!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/>
|
||||
-->
|
||||
|
||||
<!-- The default high-performance update handler -->
|
||||
<updateHandler class="solr.DirectUpdateHandler2">
|
||||
|
||||
<!-- Enables a transaction log, used for real-time get, durability, and
|
||||
and solr cloud replica recovery. The log can grow as big as
|
||||
uncommitted changes to the index, so use of a hard autoCommit
|
||||
is recommended (see below).
|
||||
"dir" - the target directory for transaction logs, defaults to the
|
||||
solr data directory. -->
|
||||
<updateLog>
|
||||
<str name="dir">${solr.ulog.dir:}</str>
|
||||
</updateLog>
|
||||
|
||||
<!-- AutoCommit
|
||||
|
||||
Perform a hard commit automatically under certain conditions.
|
||||
Instead of enabling autoCommit, consider using "commitWithin"
|
||||
when adding documents.
|
||||
|
||||
http://wiki.apache.org/solr/UpdateXmlMessages
|
||||
|
||||
maxDocs - Maximum number of documents to add since the last
|
||||
commit before automatically triggering a new commit.
|
||||
|
||||
maxTime - Maximum amount of time in ms that is allowed to pass
|
||||
since a document was added before automatically
|
||||
triggering a new commit.
|
||||
openSearcher - if false, the commit causes recent index changes
|
||||
to be flushed to stable storage, but does not cause a new
|
||||
searcher to be opened to make those changes visible.
|
||||
|
||||
If the updateLog is enabled, then it's highly recommended to
|
||||
have some sort of hard autoCommit to limit the log size.
|
||||
-->
|
||||
<autoCommit>
|
||||
<maxTime>${solr.autoCommit.maxTime:15000}</maxTime>
|
||||
<openSearcher>false</openSearcher>
|
||||
</autoCommit>
|
||||
|
||||
<!-- softAutoCommit is like autoCommit except it causes a
|
||||
'soft' commit which only ensures that changes are visible
|
||||
but does not ensure that data is synced to disk. This is
|
||||
faster and more near-realtime friendly than a hard commit.
|
||||
-->
|
||||
|
||||
<autoSoftCommit>
|
||||
<maxTime>${solr.autoSoftCommit.maxTime:-1}</maxTime>
|
||||
</autoSoftCommit>
|
||||
|
||||
<!-- Update Related Event Listeners
|
||||
|
||||
Various IndexWriter related events can trigger Listeners to
|
||||
take actions.
|
||||
|
||||
postCommit - fired after every commit or optimize command
|
||||
postOptimize - fired after every optimize command
|
||||
-->
|
||||
<!-- The RunExecutableListener executes an external command from a
|
||||
hook such as postCommit or postOptimize.
|
||||
|
||||
exe - the name of the executable to run
|
||||
dir - dir to use as the current working directory. (default=".")
|
||||
wait - the calling thread waits until the executable returns.
|
||||
(default="true")
|
||||
args - the arguments to pass to the program. (default is none)
|
||||
env - environment variables to set. (default is none)
|
||||
-->
|
||||
<!-- This example shows how RunExecutableListener could be used
|
||||
with the script based replication...
|
||||
http://wiki.apache.org/solr/CollectionDistribution
|
||||
-->
|
||||
<!--
|
||||
<listener event="postCommit" class="solr.RunExecutableListener">
|
||||
<str name="exe">solr/bin/snapshooter</str>
|
||||
<str name="dir">.</str>
|
||||
<bool name="wait">true</bool>
|
||||
<arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
|
||||
<arr name="env"> <str>MYVAR=val1</str> </arr>
|
||||
</listener>
|
||||
-->
|
||||
|
||||
</updateHandler>
|
||||
|
||||
<!-- IndexReaderFactory
|
||||
|
||||
Use the following format to specify a custom IndexReaderFactory,
|
||||
which allows for alternate IndexReader implementations.
|
||||
|
||||
** Experimental Feature **
|
||||
|
||||
Please note - Using a custom IndexReaderFactory may prevent
|
||||
certain other features from working. The API to
|
||||
IndexReaderFactory may change without warning or may even be
|
||||
removed from future releases if the problems cannot be
|
||||
resolved.
|
||||
|
||||
|
||||
** Features that may not work with custom IndexReaderFactory **
|
||||
|
||||
The ReplicationHandler assumes a disk-resident index. Using a
|
||||
custom IndexReader implementation may cause incompatibility
|
||||
with ReplicationHandler and may cause replication to not work
|
||||
correctly. See SOLR-1366 for details.
|
||||
|
||||
-->
|
||||
<!--
|
||||
<indexReaderFactory name="IndexReaderFactory" class="package.class">
|
||||
<str name="someArg">Some Value</str>
|
||||
</indexReaderFactory >
|
||||
-->
|
||||
<!-- By explicitly declaring the Factory, the termIndexDivisor can
|
||||
be specified.
|
||||
-->
|
||||
<!--
|
||||
<indexReaderFactory name="IndexReaderFactory"
|
||||
class="solr.StandardIndexReaderFactory">
|
||||
<int name="setTermIndexDivisor">12</int>
|
||||
</indexReaderFactory >
|
||||
-->
|
||||
|
||||
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
Query section - these settings control query time things like caches
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
|
||||
<query>
|
||||
<!-- Max Boolean Clauses
|
||||
|
||||
Maximum number of clauses in each BooleanQuery, an exception
|
||||
is thrown if exceeded.
|
||||
|
||||
** WARNING **
|
||||
|
||||
This option actually modifies a global Lucene property that
|
||||
will affect all SolrCores. If multiple solrconfig.xml files
|
||||
disagree on this property, the value at any given moment will
|
||||
be based on the last SolrCore to be initialized.
|
||||
|
||||
-->
|
||||
<maxBooleanClauses>1024</maxBooleanClauses>
|
||||
|
||||
|
||||
<!-- Solr Internal Query Caches
|
||||
|
||||
There are two implementations of cache available for Solr,
|
||||
LRUCache, based on a synchronized LinkedHashMap, and
|
||||
FastLRUCache, based on a ConcurrentHashMap.
|
||||
|
||||
FastLRUCache has faster gets and slower puts in single
|
||||
threaded operation and thus is generally faster than LRUCache
|
||||
when the hit ratio of the cache is high (> 75%), and may be
|
||||
faster under other scenarios on multi-cpu systems.
|
||||
-->
|
||||
|
||||
<!-- Filter Cache
|
||||
|
||||
Cache used by SolrIndexSearcher for filters (DocSets),
|
||||
unordered sets of *all* documents that match a query. When a
|
||||
new searcher is opened, its caches may be prepopulated or
|
||||
"autowarmed" using data from caches in the old searcher.
|
||||
autowarmCount is the number of items to prepopulate. For
|
||||
LRUCache, the autowarmed items will be the most recently
|
||||
accessed items.
|
||||
|
||||
Parameters:
|
||||
class - the SolrCache implementation LRUCache or
|
||||
(LRUCache or FastLRUCache)
|
||||
size - the maximum number of entries in the cache
|
||||
initialSize - the initial capacity (number of entries) of
|
||||
the cache. (see java.util.HashMap)
|
||||
autowarmCount - the number of entries to prepopulate from
|
||||
and old cache.
|
||||
-->
|
||||
<filterCache class="solr.FastLRUCache"
|
||||
size="512"
|
||||
initialSize="512"
|
||||
autowarmCount="0"/>
|
||||
|
||||
<!-- Query Result Cache
|
||||
|
||||
Caches results of searches - ordered lists of document ids
|
||||
(DocList) based on a query, a sort, and the range of documents requested.
|
||||
-->
|
||||
<queryResultCache class="solr.LRUCache"
|
||||
size="512"
|
||||
initialSize="512"
|
||||
autowarmCount="0"/>
|
||||
|
||||
<!-- Document Cache
|
||||
|
||||
Caches Lucene Document objects (the stored fields for each
|
||||
document). Since Lucene internal document ids are transient,
|
||||
this cache will not be autowarmed.
|
||||
-->
|
||||
<documentCache class="solr.LRUCache"
|
||||
size="512"
|
||||
initialSize="512"
|
||||
autowarmCount="0"/>
|
||||
|
||||
<!-- Field Value Cache
|
||||
|
||||
Cache used to hold field values that are quickly accessible
|
||||
by document id. The fieldValueCache is created by default
|
||||
even if not configured here.
|
||||
-->
|
||||
<!--
|
||||
<fieldValueCache class="solr.FastLRUCache"
|
||||
size="512"
|
||||
autowarmCount="128"
|
||||
showItems="32" />
|
||||
-->
|
||||
|
||||
<!-- Custom Cache
|
||||
|
||||
Example of a generic cache. These caches may be accessed by
|
||||
name through SolrIndexSearcher.getCache(),cacheLookup(), and
|
||||
cacheInsert(). The purpose is to enable easy caching of
|
||||
user/application level data. The regenerator argument should
|
||||
be specified as an implementation of solr.CacheRegenerator
|
||||
if autowarming is desired.
|
||||
-->
|
||||
<!--
|
||||
<cache name="myUserCache"
|
||||
class="solr.LRUCache"
|
||||
size="4096"
|
||||
initialSize="1024"
|
||||
autowarmCount="1024"
|
||||
regenerator="com.mycompany.MyRegenerator"
|
||||
/>
|
||||
-->
|
||||
|
||||
|
||||
<!-- Lazy Field Loading
|
||||
|
||||
If true, stored fields that are not requested will be loaded
|
||||
lazily. This can result in a significant speed improvement
|
||||
if the usual case is to not load all stored fields,
|
||||
especially if the skipped fields are large compressed text
|
||||
fields.
|
||||
-->
|
||||
<enableLazyFieldLoading>true</enableLazyFieldLoading>
|
||||
|
||||
<!-- Use Filter For Sorted Query
|
||||
|
||||
A possible optimization that attempts to use a filter to
|
||||
satisfy a search. If the requested sort does not include
|
||||
score, then the filterCache will be checked for a filter
|
||||
matching the query. If found, the filter will be used as the
|
||||
source of document ids, and then the sort will be applied to
|
||||
that.
|
||||
|
||||
For most situations, this will not be useful unless you
|
||||
frequently get the same search repeatedly with different sort
|
||||
options, and none of them ever use "score"
|
||||
-->
|
||||
<!--
|
||||
<useFilterForSortedQuery>true</useFilterForSortedQuery>
|
||||
-->
|
||||
|
||||
<!-- Result Window Size
|
||||
|
||||
An optimization for use with the queryResultCache. When a search
|
||||
is requested, a superset of the requested number of document ids
|
||||
are collected. For example, if a search for a particular query
|
||||
requests matching documents 10 through 19, and queryWindowSize is 50,
|
||||
then documents 0 through 49 will be collected and cached. Any further
|
||||
requests in that range can be satisfied via the cache.
|
||||
-->
|
||||
<queryResultWindowSize>20</queryResultWindowSize>
|
||||
|
||||
<!-- Maximum number of documents to cache for any entry in the
|
||||
queryResultCache.
|
||||
-->
|
||||
<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
|
||||
|
||||
<!-- Query Related Event Listeners
|
||||
|
||||
Various IndexSearcher related events can trigger Listeners to
|
||||
take actions.
|
||||
|
||||
newSearcher - fired whenever a new searcher is being prepared
|
||||
and there is a current searcher handling requests (aka
|
||||
registered). It can be used to prime certain caches to
|
||||
prevent long request times for certain requests.
|
||||
|
||||
firstSearcher - fired whenever a new searcher is being
|
||||
prepared but there is no current registered searcher to handle
|
||||
requests or to gain autowarming data from.
|
||||
|
||||
|
||||
-->
|
||||
<!-- QuerySenderListener takes an array of NamedList and executes a
|
||||
local query request for each NamedList in sequence.
|
||||
-->
|
||||
<listener event="newSearcher" class="solr.QuerySenderListener">
|
||||
<arr name="queries">
|
||||
<!--
|
||||
<lst><str name="q">solr</str><str name="sort">price asc</str></lst>
|
||||
<lst><str name="q">rocks</str><str name="sort">weight asc</str></lst>
|
||||
-->
|
||||
</arr>
|
||||
</listener>
|
||||
<listener event="firstSearcher" class="solr.QuerySenderListener">
|
||||
<arr name="queries">
|
||||
<lst>
|
||||
<str name="q">static firstSearcher warming in solrconfig.xml</str>
|
||||
</lst>
|
||||
</arr>
|
||||
</listener>
|
||||
|
||||
<!-- Use Cold Searcher
|
||||
|
||||
If a search request comes in and there is no current
|
||||
registered searcher, then immediately register the still
|
||||
warming searcher and use it. If "false" then all requests
|
||||
will block until the first searcher is done warming.
|
||||
-->
|
||||
<useColdSearcher>false</useColdSearcher>
|
||||
|
||||
<!-- Max Warming Searchers
|
||||
|
||||
Maximum number of searchers that may be warming in the
|
||||
background concurrently. An error is returned if this limit
|
||||
is exceeded.
|
||||
|
||||
Recommend values of 1-2 for read-only slaves, higher for
|
||||
masters w/o cache warming.
|
||||
-->
|
||||
<maxWarmingSearchers>2</maxWarmingSearchers>
|
||||
|
||||
</query>
|
||||
|
||||
|
||||
<!-- Request Dispatcher
|
||||
|
||||
This section contains instructions for how the SolrDispatchFilter
|
||||
should behave when processing requests for this SolrCore.
|
||||
|
||||
handleSelect is a legacy option that affects the behavior of requests
|
||||
such as /select?qt=XXX
|
||||
|
||||
handleSelect="true" will cause the SolrDispatchFilter to process
|
||||
the request and dispatch the query to a handler specified by the
|
||||
"qt" param, assuming "/select" isn't already registered.
|
||||
|
||||
handleSelect="false" will cause the SolrDispatchFilter to
|
||||
ignore "/select" requests, resulting in a 404 unless a handler
|
||||
is explicitly registered with the name "/select"
|
||||
|
||||
handleSelect="true" is not recommended for new users, but is the default
|
||||
for backwards compatibility
|
||||
-->
|
||||
<requestDispatcher handleSelect="false" >
|
||||
<!-- Request Parsing
|
||||
|
||||
These settings indicate how Solr Requests may be parsed, and
|
||||
what restrictions may be placed on the ContentStreams from
|
||||
those requests
|
||||
|
||||
enableRemoteStreaming - enables use of the stream.file
|
||||
and stream.url parameters for specifying remote streams.
|
||||
|
||||
multipartUploadLimitInKB - specifies the max size (in KiB) of
|
||||
Multipart File Uploads that Solr will allow in a Request.
|
||||
|
||||
formdataUploadLimitInKB - specifies the max size (in KiB) of
|
||||
form data (application/x-www-form-urlencoded) sent via
|
||||
POST. You can use POST to pass request parameters not
|
||||
fitting into the URL.
|
||||
|
||||
addHttpRequestToContext - if set to true, it will instruct
|
||||
the requestParsers to include the original HttpServletRequest
|
||||
object in the context map of the SolrQueryRequest under the
|
||||
key "httpRequest". It will not be used by any of the existing
|
||||
Solr components, but may be useful when developing custom
|
||||
plugins.
|
||||
|
||||
*** WARNING ***
|
||||
The settings below authorize Solr to fetch remote files, You
|
||||
should make sure your system has some authentication before
|
||||
using enableRemoteStreaming="true"
|
||||
|
||||
-->
|
||||
<requestParsers enableRemoteStreaming="true"
|
||||
multipartUploadLimitInKB="2048000"
|
||||
formdataUploadLimitInKB="2048"
|
||||
addHttpRequestToContext="false"/>
|
||||
|
||||
<!-- HTTP Caching
|
||||
|
||||
Set HTTP caching related parameters (for proxy caches and clients).
|
||||
|
||||
The options below instruct Solr not to output any HTTP Caching
|
||||
related headers
|
||||
-->
|
||||
<httpCaching never304="true" />
|
||||
<!-- If you include a <cacheControl> directive, it will be used to
|
||||
generate a Cache-Control header (as well as an Expires header
|
||||
if the value contains "max-age=")
|
||||
|
||||
By default, no Cache-Control header is generated.
|
||||
|
||||
You can use the <cacheControl> option even if you have set
|
||||
never304="true"
|
||||
-->
|
||||
<!--
|
||||
<httpCaching never304="true" >
|
||||
<cacheControl>max-age=30, public</cacheControl>
|
||||
</httpCaching>
|
||||
-->
|
||||
<!-- To enable Solr to respond with automatically generated HTTP
|
||||
Caching headers, and to response to Cache Validation requests
|
||||
correctly, set the value of never304="false"
|
||||
|
||||
This will cause Solr to generate Last-Modified and ETag
|
||||
headers based on the properties of the Index.
|
||||
|
||||
The following options can also be specified to affect the
|
||||
values of these headers...
|
||||
|
||||
lastModFrom - the default value is "openTime" which means the
|
||||
Last-Modified value (and validation against If-Modified-Since
|
||||
requests) will all be relative to when the current Searcher
|
||||
was opened. You can change it to lastModFrom="dirLastMod" if
|
||||
you want the value to exactly correspond to when the physical
|
||||
index was last modified.
|
||||
|
||||
etagSeed="..." is an option you can change to force the ETag
|
||||
header (and validation against If-None-Match requests) to be
|
||||
different even if the index has not changed (ie: when making
|
||||
significant changes to your config file)
|
||||
|
||||
(lastModifiedFrom and etagSeed are both ignored if you use
|
||||
the never304="true" option)
|
||||
-->
|
||||
<!--
|
||||
<httpCaching lastModifiedFrom="openTime"
|
||||
etagSeed="Solr">
|
||||
<cacheControl>max-age=30, public</cacheControl>
|
||||
</httpCaching>
|
||||
-->
|
||||
</requestDispatcher>
|
||||
|
||||
<!-- Request Handlers
|
||||
|
||||
http://wiki.apache.org/solr/SolrRequestHandler
|
||||
|
||||
Incoming queries will be dispatched to a specific handler by name
|
||||
based on the path specified in the request.
|
||||
|
||||
Legacy behavior: If the request path uses "/select" but no Request
|
||||
Handler has that name, and if handleSelect="true" has been specified in
|
||||
the requestDispatcher, then the Request Handler is dispatched based on
|
||||
the qt parameter. Handlers without a leading '/' are accessed this way
|
||||
like so: http://host/app/[core/]select?qt=name If no qt is
|
||||
given, then the requestHandler that declares default="true" will be
|
||||
used or the one named "standard".
|
||||
|
||||
If a Request Handler is declared with startup="lazy", then it will
|
||||
not be initialized until the first request that uses it.
|
||||
|
||||
-->
|
||||
<!-- SearchHandler
|
||||
|
||||
http://wiki.apache.org/solr/SearchHandler
|
||||
|
||||
For processing Search Queries, the primary Request Handler
|
||||
provided with Solr is "SearchHandler" It delegates to a sequent
|
||||
of SearchComponents (see below) and supports distributed
|
||||
queries across multiple shards
|
||||
-->
|
||||
<requestHandler name="/select" class="solr.SearchHandler">
|
||||
<!-- default values for query parameters can be specified, these
|
||||
will be overridden by parameters in the request
|
||||
-->
|
||||
<lst name="defaults">
|
||||
<str name="echoParams">explicit</str>
|
||||
<int name="rows">10</int>
|
||||
<str name="df">_text</str>
|
||||
</lst>
|
||||
<!-- In addition to defaults, "appends" params can be specified
|
||||
to identify values which should be appended to the list of
|
||||
multi-val params from the query (or the existing "defaults").
|
||||
-->
|
||||
<!-- In this example, the param "fq=instock:true" would be appended to
|
||||
any query time fq params the user may specify, as a mechanism for
|
||||
partitioning the index, independent of any user selected filtering
|
||||
that may also be desired (perhaps as a result of faceted searching).
|
||||
|
||||
NOTE: there is *absolutely* nothing a client can do to prevent these
|
||||
"appends" values from being used, so don't use this mechanism
|
||||
unless you are sure you always want it.
|
||||
-->
|
||||
<!--
|
||||
<lst name="appends">
|
||||
<str name="fq">inStock:true</str>
|
||||
</lst>
|
||||
-->
|
||||
<!-- "invariants" are a way of letting the Solr maintainer lock down
|
||||
the options available to Solr clients. Any params values
|
||||
specified here are used regardless of what values may be specified
|
||||
in either the query, the "defaults", or the "appends" params.
|
||||
|
||||
In this example, the facet.field and facet.query params would
|
||||
be fixed, limiting the facets clients can use. Faceting is
|
||||
not turned on by default - but if the client does specify
|
||||
facet=true in the request, these are the only facets they
|
||||
will be able to see counts for; regardless of what other
|
||||
facet.field or facet.query params they may specify.
|
||||
|
||||
NOTE: there is *absolutely* nothing a client can do to prevent these
|
||||
"invariants" values from being used, so don't use this mechanism
|
||||
unless you are sure you always want it.
|
||||
-->
|
||||
<!--
|
||||
<lst name="invariants">
|
||||
<str name="facet.field">cat</str>
|
||||
<str name="facet.field">manu_exact</str>
|
||||
<str name="facet.query">price:[* TO 500]</str>
|
||||
<str name="facet.query">price:[500 TO *]</str>
|
||||
</lst>
|
||||
-->
|
||||
<!-- If the default list of SearchComponents is not desired, that
|
||||
list can either be overridden completely, or components can be
|
||||
prepended or appended to the default list. (see below)
|
||||
-->
|
||||
<!--
|
||||
<arr name="components">
|
||||
<str>nameOfCustomComponent1</str>
|
||||
<str>nameOfCustomComponent2</str>
|
||||
</arr>
|
||||
-->
|
||||
</requestHandler>
|
||||
|
||||
<!-- A request handler that returns indented JSON by default -->
|
||||
<requestHandler name="/query" class="solr.SearchHandler">
|
||||
<lst name="defaults">
|
||||
<str name="echoParams">explicit</str>
|
||||
<str name="wt">json</str>
|
||||
<str name="indent">true</str>
|
||||
<str name="df">_text</str>
|
||||
</lst>
|
||||
</requestHandler>
|
||||
|
||||
|
||||
<!-- realtime get handler, guaranteed to return the latest stored fields of
|
||||
any document, without the need to commit or open a new searcher. The
|
||||
current implementation relies on the updateLog feature being enabled. -->
|
||||
<requestHandler name="/get" class="solr.RealTimeGetHandler">
|
||||
<lst name="defaults">
|
||||
<str name="omitHeader">true</str>
|
||||
<str name="wt">json</str>
|
||||
<str name="indent">true</str>
|
||||
</lst>
|
||||
</requestHandler>
|
||||
|
||||
|
||||
<!-- A Robust Example
|
||||
|
||||
This example SearchHandler declaration shows off usage of the
|
||||
SearchHandler with many defaults declared
|
||||
|
||||
Note that multiple instances of the same Request Handler
|
||||
(SearchHandler) can be registered multiple times with different
|
||||
names (and different init parameters)
|
||||
-->
|
||||
<requestHandler name="/browse" class="solr.SearchHandler">
|
||||
<lst name="defaults">
|
||||
<str name="echoParams">explicit</str>
|
||||
|
||||
<!-- VelocityResponseWriter settings -->
|
||||
<str name="wt">velocity</str>
|
||||
<str name="v.template">browse</str>
|
||||
<str name="v.layout">layout</str>
|
||||
<str name="title">Solritas</str>
|
||||
|
||||
<!-- Query settings -->
|
||||
<str name="defType">edismax</str>
|
||||
<str name="qf">
|
||||
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
|
||||
title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
|
||||
</str>
|
||||
<str name="df">_text</str>
|
||||
<str name="mm">100%</str>
|
||||
<str name="q.alt">*:*</str>
|
||||
<str name="rows">10</str>
|
||||
<str name="fl">*,score</str>
|
||||
|
||||
<str name="mlt.qf">
|
||||
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
|
||||
title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
|
||||
</str>
|
||||
<str name="mlt.fl">text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename</str>
|
||||
<int name="mlt.count">3</int>
|
||||
|
||||
<!-- Faceting defaults -->
|
||||
<str name="facet">on</str>
|
||||
<str name="facet.field">cat</str>
|
||||
<str name="facet.field">manu_exact</str>
|
||||
<str name="facet.field">content_type</str>
|
||||
<str name="facet.field">author_s</str>
|
||||
<str name="facet.query">ipod</str>
|
||||
<str name="facet.query">GB</str>
|
||||
<str name="facet.mincount">1</str>
|
||||
<str name="facet.pivot">cat,inStock</str>
|
||||
<str name="facet.range.other">after</str>
|
||||
<str name="facet.range">price</str>
|
||||
<int name="f.price.facet.range.start">0</int>
|
||||
<int name="f.price.facet.range.end">600</int>
|
||||
<int name="f.price.facet.range.gap">50</int>
|
||||
<str name="facet.range">popularity</str>
|
||||
<int name="f.popularity.facet.range.start">0</int>
|
||||
<int name="f.popularity.facet.range.end">10</int>
|
||||
<int name="f.popularity.facet.range.gap">3</int>
|
||||
<str name="facet.range">manufacturedate_dt</str>
|
||||
<str name="f.manufacturedate_dt.facet.range.start">NOW/YEAR-10YEARS</str>
|
||||
<str name="f.manufacturedate_dt.facet.range.end">NOW</str>
|
||||
<str name="f.manufacturedate_dt.facet.range.gap">+1YEAR</str>
|
||||
<str name="f.manufacturedate_dt.facet.range.other">before</str>
|
||||
<str name="f.manufacturedate_dt.facet.range.other">after</str>
|
||||
|
||||
<!-- Highlighting defaults -->
|
||||
<str name="hl">on</str>
|
||||
<str name="hl.fl">content features title name</str>
|
||||
<str name="hl.encoder">html</str>
|
||||
<str name="hl.simple.pre"><b></str>
|
||||
<str name="hl.simple.post"></b></str>
|
||||
<str name="f.title.hl.fragsize">0</str>
|
||||
<str name="f.title.hl.alternateField">title</str>
|
||||
<str name="f.name.hl.fragsize">0</str>
|
||||
<str name="f.name.hl.alternateField">name</str>
|
||||
<str name="f.content.hl.snippets">3</str>
|
||||
<str name="f.content.hl.fragsize">200</str>
|
||||
<str name="f.content.hl.alternateField">content</str>
|
||||
<str name="f.content.hl.maxAlternateFieldLength">750</str>
|
||||
|
||||
<!-- Spell checking defaults -->
|
||||
<str name="spellcheck">on</str>
|
||||
<str name="spellcheck.extendedResults">false</str>
|
||||
<str name="spellcheck.count">5</str>
|
||||
<str name="spellcheck.alternativeTermCount">2</str>
|
||||
<str name="spellcheck.maxResultsForSuggest">5</str>
|
||||
<str name="spellcheck.collate">true</str>
|
||||
<str name="spellcheck.collateExtendedResults">true</str>
|
||||
<str name="spellcheck.maxCollationTries">5</str>
|
||||
<str name="spellcheck.maxCollations">3</str>
|
||||
</lst>
|
||||
|
||||
<!-- append spellchecking to our list of components -->
|
||||
<arr name="last-components">
|
||||
<str>spellcheck</str>
|
||||
</arr>
|
||||
</requestHandler>
|
||||
|
||||
|
||||
<!-- Update Request Handler.
|
||||
|
||||
http://wiki.apache.org/solr/UpdateXmlMessages
|
||||
|
||||
The canonical Request Handler for Modifying the Index through
|
||||
commands specified using XML, JSON, CSV, or JAVABIN
|
||||
|
||||
Note: Since solr1.1 requestHandlers requires a valid content
|
||||
type header if posted in the body. For example, curl now
|
||||
requires: -H 'Content-type:text/xml; charset=utf-8'
|
||||
|
||||
To override the request content type and force a specific
|
||||
Content-type, use the request parameter:
|
||||
?update.contentType=text/csv
|
||||
|
||||
This handler will pick a response format to match the input
|
||||
if the 'wt' parameter is not explicit
|
||||
-->
|
||||
<requestHandler name="/update" class="solr.UpdateRequestHandler">
|
||||
<!-- See below for information on defining
|
||||
updateRequestProcessorChains that can be used by name
|
||||
on each Update Request
|
||||
-->
|
||||
<!--
|
||||
<lst name="defaults">
|
||||
<str name="update.chain">dedupe</str>
|
||||
</lst>
|
||||
-->
|
||||
</requestHandler>
|
||||
|
||||
<!-- for back compat with clients using /update/json and /update/csv -->
|
||||
<requestHandler name="/update/json" class="solr.JsonUpdateRequestHandler">
|
||||
<lst name="defaults">
|
||||
<str name="stream.contentType">application/json</str>
|
||||
</lst>
|
||||
</requestHandler>
|
||||
<requestHandler name="/update/csv" class="solr.CSVRequestHandler">
|
||||
<lst name="defaults">
|
||||
<str name="stream.contentType">application/csv</str>
|
||||
</lst>
|
||||
</requestHandler>
|
||||
|
||||
<!-- Solr Cell Update Request Handler
|
||||
|
||||
http://wiki.apache.org/solr/ExtractingRequestHandler
|
||||
|
||||
-->
|
||||
<requestHandler name="/update/extract"
|
||||
startup="lazy"
|
||||
class="solr.extraction.ExtractingRequestHandler" >
|
||||
<lst name="defaults">
|
||||
<str name="lowernames">true</str>
|
||||
<str name="uprefix">ignored_</str>
|
||||
|
||||
<!-- capture link hrefs but ignore div attributes -->
|
||||
<str name="captureAttr">true</str>
|
||||
<str name="fmap.a">links</str>
|
||||
<str name="fmap.div">ignored_</str>
|
||||
</lst>
|
||||
</requestHandler>
|
||||
|
||||
|
||||
<!-- Field Analysis Request Handler
|
||||
|
||||
RequestHandler that provides much the same functionality as
|
||||
analysis.jsp. Provides the ability to specify multiple field
|
||||
types and field names in the same request and outputs
|
||||
index-time and query-time analysis for each of them.
|
||||
|
||||
Request parameters are:
|
||||
analysis.fieldname - field name whose analyzers are to be used
|
||||
|
||||
analysis.fieldtype - field type whose analyzers are to be used
|
||||
analysis.fieldvalue - text for index-time analysis
|
||||
q (or analysis.q) - text for query time analysis
|
||||
analysis.showmatch (true|false) - When set to true and when
|
||||
query analysis is performed, the produced tokens of the
|
||||
field value analysis will be marked as "matched" for every
|
||||
token that is produces by the query analysis
|
||||
-->
|
||||
<requestHandler name="/analysis/field"
|
||||
startup="lazy"
|
||||
class="solr.FieldAnalysisRequestHandler" />
|
||||
|
||||
|
||||
<!-- Document Analysis Handler
|
||||
|
||||
http://wiki.apache.org/solr/AnalysisRequestHandler
|
||||
|
||||
An analysis handler that provides a breakdown of the analysis
|
||||
process of provided documents. This handler expects a (single)
|
||||
content stream with the following format:
|
||||
|
||||
<docs>
|
||||
<doc>
|
||||
<field name="id">1</field>
|
||||
<field name="name">The Name</field>
|
||||
<field name="text">The Text Value</field>
|
||||
</doc>
|
||||
<doc>...</doc>
|
||||
<doc>...</doc>
|
||||
...
|
||||
</docs>
|
||||
|
||||
Note: Each document must contain a field which serves as the
|
||||
unique key. This key is used in the returned response to associate
|
||||
an analysis breakdown to the analyzed document.
|
||||
|
||||
Like the FieldAnalysisRequestHandler, this handler also supports
|
||||
query analysis by sending either an "analysis.query" or "q"
|
||||
request parameter that holds the query text to be analyzed. It
|
||||
also supports the "analysis.showmatch" parameter which when set to
|
||||
true, all field tokens that match the query tokens will be marked
|
||||
as a "match".
|
||||
-->
|
||||
<requestHandler name="/analysis/document"
|
||||
class="solr.DocumentAnalysisRequestHandler"
|
||||
startup="lazy" />
|
||||
|
||||
<!-- Admin Handlers
|
||||
|
||||
Admin Handlers - This will register all the standard admin
|
||||
RequestHandlers.
|
||||
-->
|
||||
<requestHandler name="/admin/"
|
||||
class="solr.admin.AdminHandlers" />
|
||||
<!-- This single handler is equivalent to the following... -->
|
||||
<!--
|
||||
<requestHandler name="/admin/luke" class="solr.admin.LukeRequestHandler" />
|
||||
<requestHandler name="/admin/system" class="solr.admin.SystemInfoHandler" />
|
||||
<requestHandler name="/admin/plugins" class="solr.admin.PluginInfoHandler" />
|
||||
<requestHandler name="/admin/threads" class="solr.admin.ThreadDumpHandler" />
|
||||
<requestHandler name="/admin/properties" class="solr.admin.PropertiesRequestHandler" />
|
||||
<requestHandler name="/admin/file" class="solr.admin.ShowFileRequestHandler" >
|
||||
-->
|
||||
<!-- If you wish to hide files under ${solr.home}/conf, explicitly
|
||||
register the ShowFileRequestHandler using:
|
||||
-->
|
||||
<!--
|
||||
<requestHandler name="/admin/file"
|
||||
class="solr.admin.ShowFileRequestHandler" >
|
||||
<lst name="invariants">
|
||||
<str name="hidden">synonyms.txt</str>
|
||||
<str name="hidden">anotherfile.txt</str>
|
||||
</lst>
|
||||
</requestHandler>
|
||||
-->
|
||||
|
||||
<!-- ping/healthcheck -->
|
||||
<requestHandler name="/admin/ping" class="solr.PingRequestHandler">
|
||||
<lst name="invariants">
|
||||
<str name="q">solrpingquery</str>
|
||||
</lst>
|
||||
<lst name="defaults">
|
||||
<str name="echoParams">all</str>
|
||||
</lst>
|
||||
<!-- An optional feature of the PingRequestHandler is to configure the
|
||||
handler with a "healthcheckFile" which can be used to enable/disable
|
||||
the PingRequestHandler.
|
||||
relative paths are resolved against the data dir
|
||||
-->
|
||||
<!-- <str name="healthcheckFile">server-enabled.txt</str> -->
|
||||
</requestHandler>
|
||||
|
||||
<!-- Echo the request contents back to the client -->
|
||||
<requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
|
||||
<lst name="defaults">
|
||||
<str name="echoParams">explicit</str>
|
||||
<str name="echoHandler">true</str>
|
||||
</lst>
|
||||
</requestHandler>
|
||||
|
||||
<!-- Solr Replication
|
||||
|
||||
The SolrReplicationHandler supports replicating indexes from a
|
||||
"master" used for indexing and "slaves" used for queries.
|
||||
|
||||
http://wiki.apache.org/solr/SolrReplication
|
||||
|
||||
It is also necessary for SolrCloud to function (in Cloud mode, the
|
||||
replication handler is used to bulk transfer segments when nodes
|
||||
are added or need to recover).
|
||||
|
||||
https://wiki.apache.org/solr/SolrCloud/
|
||||
-->
|
||||
<requestHandler name="/replication" class="solr.ReplicationHandler" >
|
||||
<!--
|
||||
To enable simple master/slave replication, uncomment one of the
|
||||
sections below, depending on whether this solr instance should be
|
||||
the "master" or a "slave". If this instance is a "slave" you will
|
||||
also need to fill in the masterUrl to point to a real machine.
|
||||
-->
|
||||
<!--
|
||||
<lst name="master">
|
||||
<str name="replicateAfter">commit</str>
|
||||
<str name="replicateAfter">startup</str>
|
||||
<str name="confFiles">schema.xml,stopwords.txt</str>
|
||||
</lst>
|
||||
-->
|
||||
<!--
|
||||
<lst name="slave">
|
||||
<str name="masterUrl">http://your-master-hostname:8983/solr</str>
|
||||
<str name="pollInterval">00:00:60</str>
|
||||
</lst>
|
||||
-->
|
||||
</requestHandler>
|
||||
|
||||
<!-- Search Components
|
||||
|
||||
Search components are registered to SolrCore and used by
|
||||
instances of SearchHandler (which can access them by name)
|
||||
|
||||
By default, the following components are available:
|
||||
|
||||
<searchComponent name="query" class="solr.QueryComponent" />
|
||||
<searchComponent name="facet" class="solr.FacetComponent" />
|
||||
<searchComponent name="mlt" class="solr.MoreLikeThisComponent" />
|
||||
<searchComponent name="highlight" class="solr.HighlightComponent" />
|
||||
<searchComponent name="stats" class="solr.StatsComponent" />
|
||||
<searchComponent name="debug" class="solr.DebugComponent" />
|
||||
|
||||
Default configuration in a requestHandler would look like:
|
||||
|
||||
<arr name="components">
|
||||
<str>query</str>
|
||||
<str>facet</str>
|
||||
<str>mlt</str>
|
||||
<str>highlight</str>
|
||||
<str>stats</str>
|
||||
<str>debug</str>
|
||||
</arr>
|
||||
|
||||
If you register a searchComponent to one of the standard names,
|
||||
that will be used instead of the default.
|
||||
|
||||
To insert components before or after the 'standard' components, use:
|
||||
|
||||
<arr name="first-components">
|
||||
<str>myFirstComponentName</str>
|
||||
</arr>
|
||||
|
||||
<arr name="last-components">
|
||||
<str>myLastComponentName</str>
|
||||
</arr>
|
||||
|
||||
NOTE: The component registered with the name "debug" will
|
||||
always be executed after the "last-components"
|
||||
|
||||
-->
|
||||
|
||||
<!-- Spell Check
|
||||
|
||||
The spell check component can return a list of alternative spelling
|
||||
suggestions.
|
||||
|
||||
http://wiki.apache.org/solr/SpellCheckComponent
|
||||
-->
|
||||
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
|
||||
|
||||
<str name="queryAnalyzerFieldType">text_general</str>
|
||||
|
||||
<!-- Multiple "Spell Checkers" can be declared and used by this
|
||||
component
|
||||
-->
|
||||
|
||||
<!-- a spellchecker built from a field of the main index -->
|
||||
<lst name="spellchecker">
|
||||
<str name="name">default</str>
|
||||
<str name="field">text</str>
|
||||
<str name="classname">solr.DirectSolrSpellChecker</str>
|
||||
<!-- the spellcheck distance measure used, the default is the internal levenshtein -->
|
||||
<str name="distanceMeasure">internal</str>
|
||||
<!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
|
||||
<float name="accuracy">0.5</float>
|
||||
<!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
|
||||
<int name="maxEdits">2</int>
|
||||
<!-- the minimum shared prefix when enumerating terms -->
|
||||
<int name="minPrefix">1</int>
|
||||
<!-- maximum number of inspections per result. -->
|
||||
<int name="maxInspections">5</int>
|
||||
<!-- minimum length of a query term to be considered for correction -->
|
||||
<int name="minQueryLength">4</int>
|
||||
<!-- maximum threshold of documents a query term can appear to be considered for correction -->
|
||||
<float name="maxQueryFrequency">0.01</float>
|
||||
<!-- uncomment this to require suggestions to occur in 1% of the documents
|
||||
<float name="thresholdTokenFrequency">.01</float>
|
||||
-->
|
||||
</lst>
|
||||
|
||||
<!-- a spellchecker that can break or combine words. See "/spell" handler below for usage -->
|
||||
<lst name="spellchecker">
|
||||
<str name="name">wordbreak</str>
|
||||
<str name="classname">solr.WordBreakSolrSpellChecker</str>
|
||||
<str name="field">name</str>
|
||||
<str name="combineWords">true</str>
|
||||
<str name="breakWords">true</str>
|
||||
<int name="maxChanges">10</int>
|
||||
</lst>
|
||||
|
||||
<!-- a spellchecker that uses a different distance measure -->
|
||||
<!--
|
||||
<lst name="spellchecker">
|
||||
<str name="name">jarowinkler</str>
|
||||
<str name="field">spell</str>
|
||||
<str name="classname">solr.DirectSolrSpellChecker</str>
|
||||
<str name="distanceMeasure">
|
||||
org.apache.lucene.search.spell.JaroWinklerDistance
|
||||
</str>
|
||||
</lst>
|
||||
-->
|
||||
|
||||
<!-- a spellchecker that use an alternate comparator
|
||||
|
||||
comparatorClass be one of:
|
||||
1. score (default)
|
||||
2. freq (Frequency first, then score)
|
||||
3. A fully qualified class name
|
||||
-->
|
||||
<!--
|
||||
<lst name="spellchecker">
|
||||
<str name="name">freq</str>
|
||||
<str name="field">lowerfilt</str>
|
||||
<str name="classname">solr.DirectSolrSpellChecker</str>
|
||||
<str name="comparatorClass">freq</str>
|
||||
-->
|
||||
|
||||
<!-- A spellchecker that reads the list of words from a file -->
|
||||
<!--
|
||||
<lst name="spellchecker">
|
||||
<str name="classname">solr.FileBasedSpellChecker</str>
|
||||
<str name="name">file</str>
|
||||
<str name="sourceLocation">spellings.txt</str>
|
||||
<str name="characterEncoding">UTF-8</str>
|
||||
<str name="spellcheckIndexDir">spellcheckerFile</str>
|
||||
</lst>
|
||||
-->
|
||||
</searchComponent>
|
||||
|
||||
<!-- A request handler for demonstrating the spellcheck component.
|
||||
|
||||
NOTE: This is purely as an example. The whole purpose of the
|
||||
SpellCheckComponent is to hook it into the request handler that
|
||||
handles your normal user queries so that a separate request is
|
||||
not needed to get suggestions.
|
||||
|
||||
IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS
|
||||
NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM!
|
||||
|
||||
See http://wiki.apache.org/solr/SpellCheckComponent for details
|
||||
on the request parameters.
|
||||
-->
|
||||
<requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
|
||||
<lst name="defaults">
|
||||
<str name="df">_text</str>
|
||||
<!-- Solr will use suggestions from both the 'default' spellchecker
|
||||
and from the 'wordbreak' spellchecker and combine them.
|
||||
collations (re-written queries) can include a combination of
|
||||
corrections from both spellcheckers -->
|
||||
<str name="spellcheck.dictionary">default</str>
|
||||
<str name="spellcheck.dictionary">wordbreak</str>
|
||||
<str name="spellcheck">on</str>
|
||||
<str name="spellcheck.extendedResults">true</str>
|
||||
<str name="spellcheck.count">10</str>
|
||||
<str name="spellcheck.alternativeTermCount">5</str>
|
||||
<str name="spellcheck.maxResultsForSuggest">5</str>
|
||||
<str name="spellcheck.collate">true</str>
|
||||
<str name="spellcheck.collateExtendedResults">true</str>
|
||||
<str name="spellcheck.maxCollationTries">10</str>
|
||||
<str name="spellcheck.maxCollations">5</str>
|
||||
</lst>
|
||||
<arr name="last-components">
|
||||
<str>spellcheck</str>
|
||||
</arr>
|
||||
</requestHandler>
|
||||
|
||||
<!-- Term Vector Component
|
||||
|
||||
http://wiki.apache.org/solr/TermVectorComponent
|
||||
-->
|
||||
<searchComponent name="tvComponent" class="solr.TermVectorComponent"/>
|
||||
|
||||
<!-- A request handler for demonstrating the term vector component
|
||||
|
||||
This is purely as an example.
|
||||
|
||||
In reality you will likely want to add the component to your
|
||||
already specified request handlers.
|
||||
-->
|
||||
<requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy">
|
||||
<lst name="defaults">
|
||||
<str name="df">_text</str>
|
||||
<bool name="tv">true</bool>
|
||||
</lst>
|
||||
<arr name="last-components">
|
||||
<str>tvComponent</str>
|
||||
</arr>
|
||||
</requestHandler>
|
||||
|
||||
<!-- Clustering Component
|
||||
|
||||
http://wiki.apache.org/solr/ClusteringComponent
|
||||
|
||||
You'll need to set the solr.clustering.enabled system property
|
||||
when running solr to run with clustering enabled:
|
||||
|
||||
java -Dsolr.clustering.enabled=true -jar start.jar
|
||||
|
||||
-->
|
||||
<searchComponent name="clustering"
|
||||
enable="${solr.clustering.enabled:false}"
|
||||
class="solr.clustering.ClusteringComponent" >
|
||||
<!-- Declare an engine -->
|
||||
<lst name="engine">
|
||||
<!-- The name, only one can be named "default" -->
|
||||
<str name="name">default</str>
|
||||
|
||||
<!-- Class name of Carrot2 clustering algorithm.
|
||||
|
||||
Currently available algorithms are:
|
||||
|
||||
* org.carrot2.clustering.lingo.LingoClusteringAlgorithm
|
||||
* org.carrot2.clustering.stc.STCClusteringAlgorithm
|
||||
* org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm
|
||||
|
||||
See http://project.carrot2.org/algorithms.html for the
|
||||
algorithm's characteristics.
|
||||
-->
|
||||
<str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
|
||||
|
||||
<!-- Overriding values for Carrot2 default algorithm attributes.
|
||||
|
||||
For a description of all available attributes, see:
|
||||
http://download.carrot2.org/stable/manual/#chapter.components.
|
||||
Use attribute key as name attribute of str elements
|
||||
below. These can be further overridden for individual
|
||||
requests by specifying attribute key as request parameter
|
||||
name and attribute value as parameter value.
|
||||
-->
|
||||
<str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
|
||||
|
||||
<!-- Location of Carrot2 lexical resources.
|
||||
|
||||
A directory from which to load Carrot2-specific stop words
|
||||
and stop labels. Absolute or relative to Solr config directory.
|
||||
If a specific resource (e.g. stopwords.en) is present in the
|
||||
specified dir, it will completely override the corresponding
|
||||
default one that ships with Carrot2.
|
||||
|
||||
For an overview of Carrot2 lexical resources, see:
|
||||
http://download.carrot2.org/head/manual/#chapter.lexical-resources
|
||||
-->
|
||||
<str name="carrot.lexicalResourcesDir">clustering/carrot2</str>
|
||||
|
||||
<!-- The language to assume for the documents.
|
||||
|
||||
For a list of allowed values, see:
|
||||
http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage
|
||||
-->
|
||||
<str name="MultilingualClustering.defaultLanguage">ENGLISH</str>
|
||||
</lst>
|
||||
<lst name="engine">
|
||||
<str name="name">stc</str>
|
||||
<str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
|
||||
</lst>
|
||||
</searchComponent>
|
||||
|
||||
<!-- A request handler for demonstrating the clustering component
|
||||
|
||||
This is purely as an example.
|
||||
|
||||
In reality you will likely want to add the component to your
|
||||
already specified request handlers.
|
||||
-->
|
||||
<requestHandler name="/clustering"
|
||||
startup="lazy"
|
||||
enable="${solr.clustering.enabled:false}"
|
||||
class="solr.SearchHandler">
|
||||
<lst name="defaults">
|
||||
<bool name="clustering">true</bool>
|
||||
<str name="clustering.engine">default</str>
|
||||
<bool name="clustering.results">true</bool>
|
||||
<!-- The title field -->
|
||||
<str name="carrot.title">name</str>
|
||||
<str name="carrot.url">id</str>
|
||||
<!-- The field to cluster on -->
|
||||
<str name="carrot.snippet">features</str>
|
||||
<!-- produce summaries -->
|
||||
<bool name="carrot.produceSummary">true</bool>
|
||||
<!-- the maximum number of labels per cluster -->
|
||||
<!--<int name="carrot.numDescriptions">5</int>-->
|
||||
<!-- produce sub clusters -->
|
||||
<bool name="carrot.outputSubClusters">false</bool>
|
||||
|
||||
<str name="defType">edismax</str>
|
||||
<str name="qf">
|
||||
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
|
||||
</str>
|
||||
<str name="q.alt">*:*</str>
|
||||
<str name="rows">10</str>
|
||||
<str name="fl">*,score</str>
|
||||
</lst>
|
||||
<arr name="last-components">
|
||||
<str>clustering</str>
|
||||
</arr>
|
||||
</requestHandler>
|
||||
|
||||
<!-- Terms Component
|
||||
|
||||
http://wiki.apache.org/solr/TermsComponent
|
||||
|
||||
A component to return terms and document frequency of those
|
||||
terms
|
||||
-->
|
||||
<searchComponent name="terms" class="solr.TermsComponent"/>
|
||||
|
||||
<!-- A request handler for demonstrating the terms component -->
|
||||
<requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
|
||||
<lst name="defaults">
|
||||
<bool name="terms">true</bool>
|
||||
<bool name="distrib">false</bool>
|
||||
</lst>
|
||||
<arr name="components">
|
||||
<str>terms</str>
|
||||
</arr>
|
||||
</requestHandler>
|
||||
|
||||
|
||||
<!-- Query Elevation Component
|
||||
|
||||
http://wiki.apache.org/solr/QueryElevationComponent
|
||||
|
||||
a search component that enables you to configure the top
|
||||
results for a given query regardless of the normal lucene
|
||||
scoring.
|
||||
-->
|
||||
<searchComponent name="elevator" class="solr.QueryElevationComponent" >
|
||||
<!-- pick a fieldType to analyze queries -->
|
||||
<str name="queryFieldType">string</str>
|
||||
<str name="config-file">elevate.xml</str>
|
||||
</searchComponent>
|
||||
|
||||
<!-- A request handler for demonstrating the elevator component -->
|
||||
<requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
|
||||
<lst name="defaults">
|
||||
<str name="echoParams">explicit</str>
|
||||
<str name="df">text</str>
|
||||
</lst>
|
||||
<arr name="last-components">
|
||||
<str>elevator</str>
|
||||
</arr>
|
||||
</requestHandler>
|
||||
|
||||
<!-- Highlighting Component
|
||||
|
||||
http://wiki.apache.org/solr/HighlightingParameters
|
||||
-->
|
||||
<searchComponent class="solr.HighlightComponent" name="highlight">
|
||||
<highlighting>
|
||||
<!-- Configure the standard fragmenter -->
|
||||
<!-- This could most likely be commented out in the "default" case -->
|
||||
<fragmenter name="gap"
|
||||
default="true"
|
||||
class="solr.highlight.GapFragmenter">
|
||||
<lst name="defaults">
|
||||
<int name="hl.fragsize">100</int>
|
||||
</lst>
|
||||
</fragmenter>
|
||||
|
||||
<!-- A regular-expression-based fragmenter
|
||||
(for sentence extraction)
|
||||
-->
|
||||
<fragmenter name="regex"
|
||||
class="solr.highlight.RegexFragmenter">
|
||||
<lst name="defaults">
|
||||
<!-- slightly smaller fragsizes work better because of slop -->
|
||||
<int name="hl.fragsize">70</int>
|
||||
<!-- allow 50% slop on fragment sizes -->
|
||||
<float name="hl.regex.slop">0.5</float>
|
||||
<!-- a basic sentence pattern -->
|
||||
<str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str>
|
||||
</lst>
|
||||
</fragmenter>
|
||||
|
||||
<!-- Configure the standard formatter -->
|
||||
<formatter name="html"
|
||||
default="true"
|
||||
class="solr.highlight.HtmlFormatter">
|
||||
<lst name="defaults">
|
||||
<str name="hl.simple.pre"><![CDATA[<em>]]></str>
|
||||
<str name="hl.simple.post"><![CDATA[</em>]]></str>
|
||||
</lst>
|
||||
</formatter>
|
||||
|
||||
<!-- Configure the standard encoder -->
|
||||
<encoder name="html"
|
||||
class="solr.highlight.HtmlEncoder" />
|
||||
|
||||
<!-- Configure the standard fragListBuilder -->
|
||||
<fragListBuilder name="simple"
|
||||
class="solr.highlight.SimpleFragListBuilder"/>
|
||||
|
||||
<!-- Configure the single fragListBuilder -->
|
||||
<fragListBuilder name="single"
|
||||
class="solr.highlight.SingleFragListBuilder"/>
|
||||
|
||||
<!-- Configure the weighted fragListBuilder -->
|
||||
<fragListBuilder name="weighted"
|
||||
default="true"
|
||||
class="solr.highlight.WeightedFragListBuilder"/>
|
||||
|
||||
<!-- default tag FragmentsBuilder -->
|
||||
<fragmentsBuilder name="default"
|
||||
default="true"
|
||||
class="solr.highlight.ScoreOrderFragmentsBuilder">
|
||||
<!--
|
||||
<lst name="defaults">
|
||||
<str name="hl.multiValuedSeparatorChar">/</str>
|
||||
</lst>
|
||||
-->
|
||||
</fragmentsBuilder>
|
||||
|
||||
<!-- multi-colored tag FragmentsBuilder -->
|
||||
<fragmentsBuilder name="colored"
|
||||
class="solr.highlight.ScoreOrderFragmentsBuilder">
|
||||
<lst name="defaults">
|
||||
<str name="hl.tag.pre"><![CDATA[
|
||||
<b style="background:yellow">,<b style="background:lawgreen">,
|
||||
<b style="background:aquamarine">,<b style="background:magenta">,
|
||||
<b style="background:palegreen">,<b style="background:coral">,
|
||||
<b style="background:wheat">,<b style="background:khaki">,
|
||||
<b style="background:lime">,<b style="background:deepskyblue">]]></str>
|
||||
<str name="hl.tag.post"><![CDATA[</b>]]></str>
|
||||
</lst>
|
||||
</fragmentsBuilder>
|
||||
|
||||
<boundaryScanner name="default"
|
||||
default="true"
|
||||
class="solr.highlight.SimpleBoundaryScanner">
|
||||
<lst name="defaults">
|
||||
<str name="hl.bs.maxScan">10</str>
|
||||
<str name="hl.bs.chars">.,!? 	 </str>
|
||||
</lst>
|
||||
</boundaryScanner>
|
||||
|
||||
<boundaryScanner name="breakIterator"
|
||||
class="solr.highlight.BreakIteratorBoundaryScanner">
|
||||
<lst name="defaults">
|
||||
<!-- type should be one of CHARACTER, WORD(default), LINE and SENTENCE -->
|
||||
<str name="hl.bs.type">WORD</str>
|
||||
<!-- language and country are used when constructing Locale object. -->
|
||||
<!-- And the Locale object will be used when getting instance of BreakIterator -->
|
||||
<str name="hl.bs.language">en</str>
|
||||
<str name="hl.bs.country">US</str>
|
||||
</lst>
|
||||
</boundaryScanner>
|
||||
</highlighting>
|
||||
</searchComponent>
|
||||
|
||||
<!-- Update Processors
|
||||
|
||||
Chains of Update Processor Factories for dealing with Update
|
||||
Requests can be declared, and then used by name in Update
|
||||
Request Processors
|
||||
|
||||
http://wiki.apache.org/solr/UpdateRequestProcessor
|
||||
|
||||
-->
|
||||
<!-- Deduplication
|
||||
|
||||
An example dedup update processor that creates the "id" field
|
||||
on the fly based on the hash code of some other fields. This
|
||||
example has overwriteDupes set to false since we are using the
|
||||
id field as the signatureField and Solr will maintain
|
||||
uniqueness based on that anyway.
|
||||
|
||||
-->
|
||||
<!--
|
||||
<updateRequestProcessorChain name="dedupe">
|
||||
<processor class="solr.processor.SignatureUpdateProcessorFactory">
|
||||
<bool name="enabled">true</bool>
|
||||
<str name="signatureField">id</str>
|
||||
<bool name="overwriteDupes">false</bool>
|
||||
<str name="fields">name,features,cat</str>
|
||||
<str name="signatureClass">solr.processor.Lookup3Signature</str>
|
||||
</processor>
|
||||
<processor class="solr.LogUpdateProcessorFactory" />
|
||||
<processor class="solr.RunUpdateProcessorFactory" />
|
||||
</updateRequestProcessorChain>
|
||||
-->
|
||||
|
||||
<!-- Language identification
|
||||
|
||||
This example update chain identifies the language of the incoming
|
||||
documents using the langid contrib. The detected language is
|
||||
written to field language_s. No field name mapping is done.
|
||||
The fields used for detection are text, title, subject and description,
|
||||
making this example suitable for detecting languages form full-text
|
||||
rich documents injected via ExtractingRequestHandler.
|
||||
See more about langId at http://wiki.apache.org/solr/LanguageDetection
|
||||
-->
|
||||
<!--
|
||||
<updateRequestProcessorChain name="langid">
|
||||
<processor class="org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory">
|
||||
<str name="langid.fl">text,title,subject,description</str>
|
||||
<str name="langid.langField">language_s</str>
|
||||
<str name="langid.fallback">en</str>
|
||||
</processor>
|
||||
<processor class="solr.LogUpdateProcessorFactory" />
|
||||
<processor class="solr.RunUpdateProcessorFactory" />
|
||||
</updateRequestProcessorChain>
|
||||
-->
|
||||
|
||||
<!-- Script update processor
|
||||
|
||||
This example hooks in an update processor implemented using JavaScript.
|
||||
|
||||
See more about the script update processor at http://wiki.apache.org/solr/ScriptUpdateProcessor
|
||||
-->
|
||||
<!--
|
||||
<updateRequestProcessorChain name="script">
|
||||
<processor class="solr.StatelessScriptUpdateProcessorFactory">
|
||||
<str name="script">update-script.js</str>
|
||||
<lst name="params">
|
||||
<str name="config_param">example config parameter</str>
|
||||
</lst>
|
||||
</processor>
|
||||
<processor class="solr.RunUpdateProcessorFactory" />
|
||||
</updateRequestProcessorChain>
|
||||
-->
|
||||
|
||||
<!-- Response Writers
|
||||
|
||||
http://wiki.apache.org/solr/QueryResponseWriter
|
||||
|
||||
Request responses will be written using the writer specified by
|
||||
the 'wt' request parameter matching the name of a registered
|
||||
writer.
|
||||
|
||||
The "default" writer is the default and will be used if 'wt' is
|
||||
not specified in the request.
|
||||
-->
|
||||
<!-- The following response writers are implicitly configured unless
|
||||
overridden...
|
||||
-->
|
||||
<!--
|
||||
<queryResponseWriter name="xml"
|
||||
default="true"
|
||||
class="solr.XMLResponseWriter" />
|
||||
<queryResponseWriter name="json" class="solr.JSONResponseWriter"/>
|
||||
<queryResponseWriter name="python" class="solr.PythonResponseWriter"/>
|
||||
<queryResponseWriter name="ruby" class="solr.RubyResponseWriter"/>
|
||||
<queryResponseWriter name="php" class="solr.PHPResponseWriter"/>
|
||||
<queryResponseWriter name="phps" class="solr.PHPSerializedResponseWriter"/>
|
||||
<queryResponseWriter name="csv" class="solr.CSVResponseWriter"/>
|
||||
<queryResponseWriter name="schema.xml" class="solr.SchemaXmlResponseWriter"/>
|
||||
-->
|
||||
|
||||
<queryResponseWriter name="json" class="solr.JSONResponseWriter">
|
||||
<!-- For the purposes of the tutorial, JSON responses are written as
|
||||
plain text so that they are easy to read in *any* browser.
|
||||
If you expect a MIME type of "application/json" just remove this override.
|
||||
-->
|
||||
<str name="content-type">text/plain; charset=UTF-8</str>
|
||||
</queryResponseWriter>
|
||||
|
||||
<!--
|
||||
Custom response writers can be declared as needed...
|
||||
-->
|
||||
<queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"/>
|
||||
|
||||
|
||||
<!-- XSLT response writer transforms the XML output by any xslt file found
|
||||
in Solr's conf/xslt directory. Changes to xslt files are checked for
|
||||
every xsltCacheLifetimeSeconds.
|
||||
-->
|
||||
<queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
|
||||
<int name="xsltCacheLifetimeSeconds">5</int>
|
||||
</queryResponseWriter>
|
||||
|
||||
<!-- Query Parsers
|
||||
|
||||
http://wiki.apache.org/solr/SolrQuerySyntax
|
||||
|
||||
Multiple QParserPlugins can be registered by name, and then
|
||||
used in either the "defType" param for the QueryComponent (used
|
||||
by SearchHandler) or in LocalParams
|
||||
-->
|
||||
<!-- example of registering a query parser -->
|
||||
<!--
|
||||
<queryParser name="myparser" class="com.mycompany.MyQParserPlugin"/>
|
||||
-->
|
||||
|
||||
<!-- Function Parsers
|
||||
|
||||
http://wiki.apache.org/solr/FunctionQuery
|
||||
|
||||
Multiple ValueSourceParsers can be registered by name, and then
|
||||
used as function names when using the "func" QParser.
|
||||
-->
|
||||
<!-- example of registering a custom function parser -->
|
||||
<!--
|
||||
<valueSourceParser name="myfunc"
|
||||
class="com.mycompany.MyValueSourceParser" />
|
||||
-->
|
||||
|
||||
|
||||
<!-- Document Transformers
|
||||
http://wiki.apache.org/solr/DocTransformers
|
||||
-->
|
||||
<!--
|
||||
Could be something like:
|
||||
<transformer name="db" class="com.mycompany.LoadFromDatabaseTransformer" >
|
||||
<int name="connection">jdbc://....</int>
|
||||
</transformer>
|
||||
|
||||
To add a constant value to all docs, use:
|
||||
<transformer name="mytrans2" class="org.apache.solr.response.transform.ValueAugmenterFactory" >
|
||||
<int name="value">5</int>
|
||||
</transformer>
|
||||
|
||||
If you want the user to still be able to change it with _value:something_ use this:
|
||||
<transformer name="mytrans3" class="org.apache.solr.response.transform.ValueAugmenterFactory" >
|
||||
<double name="defaultValue">5</double>
|
||||
</transformer>
|
||||
|
||||
If you are using the QueryElevationComponent, you may wish to mark documents that get boosted. The
|
||||
EditorialMarkerFactory will do exactly that:
|
||||
<transformer name="qecBooster" class="org.apache.solr.response.transform.EditorialMarkerFactory" />
|
||||
-->
|
||||
|
||||
|
||||
<!-- Legacy config for the admin interface -->
|
||||
<admin>
|
||||
<defaultQuery>*:*</defaultQuery>
|
||||
</admin>
|
||||
|
||||
</config>
|
58
conf/solr/4/extras/stopwords.txt
Normal file
58
conf/solr/4/extras/stopwords.txt
Normal file
@ -0,0 +1,58 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# a couple of test stopwords to test that the words are really being
|
||||
# configured from this file:
|
||||
stopworda
|
||||
stopwordb
|
||||
|
||||
#Standard english stop words taken from Lucene's StopAnalyzer
|
||||
a
|
||||
an
|
||||
and
|
||||
are
|
||||
as
|
||||
at
|
||||
be
|
||||
but
|
||||
by
|
||||
for
|
||||
if
|
||||
in
|
||||
into
|
||||
is
|
||||
it
|
||||
no
|
||||
not
|
||||
of
|
||||
on
|
||||
or
|
||||
s
|
||||
such
|
||||
t
|
||||
that
|
||||
the
|
||||
their
|
||||
then
|
||||
there
|
||||
these
|
||||
they
|
||||
this
|
||||
to
|
||||
was
|
||||
will
|
||||
with
|
||||
|
29
conf/solr/4/extras/synonyms.txt
Normal file
29
conf/solr/4/extras/synonyms.txt
Normal file
@ -0,0 +1,29 @@
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
#some test synonym mappings unlikely to appear in real input text
|
||||
aaafoo => aaabar
|
||||
bbbfoo => bbbfoo bbbbar
|
||||
cccfoo => cccbar cccbaz
|
||||
fooaaa,baraaa,bazaaa
|
||||
|
||||
# Some synonym groups specific to this example
|
||||
GB,gib,gigabyte,gigabytes
|
||||
MB,mib,megabyte,megabytes
|
||||
Television, Televisions, TV, TVs
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
|
||||
#after us won't split it into two words.
|
||||
|
||||
# Synonym mappings can be used for spelling correction too
|
||||
pixima => pixma
|
||||
|
68
conf/solr/4/templates/schema.ss
Normal file
68
conf/solr/4/templates/schema.ss
Normal file
@ -0,0 +1,68 @@
|
||||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!--
|
||||
This is the Solr schema file. This file should be named "schema.xml" and
|
||||
should be in the conf directory under the solr home
|
||||
(i.e. ./solr/conf/schema.xml by default)
|
||||
or located where the classloader for the Solr webapp can find it.
|
||||
|
||||
This example schema is the recommended starting point for users.
|
||||
It should be kept correct and concise, usable out-of-the-box.
|
||||
|
||||
For more information, on how to customize this file, please see
|
||||
http://wiki.apache.org/solr/SchemaXml
|
||||
|
||||
PERFORMANCE NOTE: this schema includes many optional features and should not
|
||||
be used for benchmarking. To improve performance one could
|
||||
- set stored="false" for all fields possible (esp large fields) when you
|
||||
only need to search on the field but don't need to return the original
|
||||
value.
|
||||
- set indexed="false" if you don't need to search on the field, but only
|
||||
return the field as a result of searching on other indexed fields.
|
||||
- remove all unneeded copyField statements
|
||||
- for best index size and searching performance, set "index" to false
|
||||
for all general text fields, use copyField to copy them to the
|
||||
catchall "text" field, and use that for searching.
|
||||
- For maximum indexing performance, use the StreamingUpdateSolrServer
|
||||
java client.
|
||||
- Remember to run the JVM in server mode, and use a higher logging level
|
||||
that avoids logging every request
|
||||
-->
|
||||
|
||||
<schema name="$IndexName" version="1.5">
|
||||
|
||||
<types>
|
||||
$Types
|
||||
</types>
|
||||
|
||||
<fields>
|
||||
$FieldDefinitions
|
||||
|
||||
<field name="_version_" type="long" indexed="true" stored="true" multiValued="false"/>
|
||||
</fields>
|
||||
|
||||
$CopyFieldDefinitions
|
||||
|
||||
<uniqueKey>_documentid</uniqueKey>
|
||||
|
||||
<defaultSearchField>_text</defaultSearchField>
|
||||
|
||||
<solrQueryParser defaultOperator="OR"/>
|
||||
|
||||
</schema>
|
360
conf/solr/4/templates/types.ss
Normal file
360
conf/solr/4/templates/types.ss
Normal file
@ -0,0 +1,360 @@
|
||||
<!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
|
||||
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
|
||||
|
||||
<!-- boolean type: "true" or "false" -->
|
||||
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
|
||||
<!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
|
||||
<fieldtype name="binary" class="solr.BinaryField"/>
|
||||
|
||||
<!-- The optional sortMissingLast and sortMissingFirst attributes are
|
||||
currently supported on types that are sorted internally as strings.
|
||||
This includes "string","boolean","sint","slong","sfloat","sdouble","pdate"
|
||||
- If sortMissingLast="true", then a sort on this field will cause documents
|
||||
without the field to come after documents with the field,
|
||||
regardless of the requested sort order (asc or desc).
|
||||
- If sortMissingFirst="true", then a sort on this field will cause documents
|
||||
without the field to come before documents with the field,
|
||||
regardless of the requested sort order.
|
||||
- If sortMissingLast="false" and sortMissingFirst="false" (the default),
|
||||
then default lucene sorting will be used which places docs without the
|
||||
field first in an ascending sort and last in a descending sort.
|
||||
-->
|
||||
|
||||
<!--
|
||||
Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
|
||||
-->
|
||||
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
|
||||
<!--
|
||||
Numeric field types that index each value at various levels of precision
|
||||
to accelerate range queries when the number of values between the range
|
||||
endpoints is large. See the javadoc for NumericRangeQuery for internal
|
||||
implementation details.
|
||||
|
||||
Smaller precisionStep values (specified in bits) will lead to more tokens
|
||||
indexed per value, slightly larger index size, and faster range queries.
|
||||
A precisionStep of 0 disables indexing at different precision levels.
|
||||
-->
|
||||
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
||||
|
||||
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
|
||||
is a more restricted form of the canonical representation of dateTime
|
||||
http://www.w3.org/TR/xmlschema-2/#dateTime
|
||||
The trailing "Z" designates UTC time and is mandatory.
|
||||
Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
|
||||
All other components are mandatory.
|
||||
|
||||
Expressions can also be used to denote calculations that should be
|
||||
performed relative to "NOW" to determine the value, ie...
|
||||
|
||||
NOW/HOUR
|
||||
... Round to the start of the current hour
|
||||
NOW-1DAY
|
||||
... Exactly 1 day prior to now
|
||||
NOW/DAY+6MONTHS+3DAYS
|
||||
... 6 months and 3 days in the future from the start of
|
||||
the current day
|
||||
|
||||
Consult the DateField javadocs for more information.
|
||||
|
||||
Note: For faster range queries, consider the tdate type
|
||||
-->
|
||||
<fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
|
||||
|
||||
<!-- A Trie based date field for faster date range queries and date faceting. -->
|
||||
<fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
|
||||
|
||||
<!-- The "RandomSortField" is not used to store or search any
|
||||
data. You can declare fields of this type it in your schema
|
||||
to generate pseudo-random orderings of your docs for sorting
|
||||
purposes. The ordering is generated based on the field name
|
||||
and the version of the index, As long as the index version
|
||||
remains unchanged, and the same field name is reused,
|
||||
the ordering of the docs will be consistent.
|
||||
If you want different psuedo-random orderings of documents,
|
||||
for the same version of the index, use a dynamicField and
|
||||
change the name
|
||||
-->
|
||||
<fieldType name="random" class="solr.RandomSortField" indexed="true" />
|
||||
|
||||
<!-- solr.TextField allows the specification of custom text analyzers
|
||||
specified as a tokenizer and a list of token filters. Different
|
||||
analyzers may be specified for indexing and querying.
|
||||
|
||||
The optional positionIncrementGap puts space between multiple fields of
|
||||
this type on the same document, with the purpose of preventing false phrase
|
||||
matching across fields.
|
||||
|
||||
For more info on customizing your analyzer chain, please see
|
||||
http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
|
||||
-->
|
||||
|
||||
<!-- One can also specify an existing Analyzer class that has a
|
||||
default constructor via the class attribute on the analyzer element
|
||||
<fieldType name="text_greek" class="solr.TextField">
|
||||
<analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
|
||||
</fieldType>
|
||||
-->
|
||||
|
||||
<!-- A text field that only splits on whitespace for exact matching of words -->
|
||||
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
|
||||
words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
|
||||
so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
|
||||
Synonyms and stopwords are customized by external files, and stemming is enabled.
|
||||
The attribute autoGeneratePhraseQueries="true" (the default) causes words that get split to
|
||||
form phrase queries. For example, WordDelimiterFilter splitting text:pdp-11 will cause the parser
|
||||
to generate text:"pdp 11" rather than (text:PDP OR text:11).
|
||||
NOTE: autoGeneratePhraseQueries="true" tends to not work well for non whitespace delimited languages.
|
||||
-->
|
||||
<fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<!-- in this example, we will only use synonyms at query time
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
-->
|
||||
<!-- Case insensitive stop word removal.
|
||||
add enablePositionIncrements=true in both the index and query
|
||||
analyzers to leave a 'gap' for more accurate phrase queries.
|
||||
-->
|
||||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- A copy of text that has the HTMLStripCharFilterFactory as the first index analyzer, so that html can be provided -->
|
||||
<fieldType name="htmltext" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
||||
<analyzer type="index">
|
||||
<charFilter class="solr.HTMLStripCharFilterFactory"/>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- Less flexible matching, but less false matches. Probably not ideal for product names,
|
||||
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
||||
<fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
||||
possible with WordDelimiterFilter in conjuncton with stemming. -->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- Text optimized for spelling corrections, with minimal alterations (e.g. no stemming) -->
|
||||
<fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory" />
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.LengthFilterFactory" min="4" max="20" />
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<!-- A general unstemmed text field - good if one does not know the language of the field -->
|
||||
<fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<!-- A general unstemmed text field that indexes tokens normally and also
|
||||
reversed (via ReversedWildcardFilterFactory), to enable more efficient
|
||||
leading wildcard queries. -->
|
||||
<fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
|
||||
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- charFilter + WhitespaceTokenizer -->
|
||||
<!--
|
||||
<fieldType name="textCharNorm" class="solr.TextField" positionIncrementGap="100" >
|
||||
<analyzer>
|
||||
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
-->
|
||||
|
||||
<!-- This is an example of using the KeywordTokenizer along
|
||||
With various TokenFilterFactories to produce a sortable field
|
||||
that does not include some properties of the source text
|
||||
-->
|
||||
<fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
|
||||
<analyzer>
|
||||
<!-- KeywordTokenizer does no actual tokenizing, so the entire
|
||||
input string is preserved as a single token
|
||||
-->
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<!-- The LowerCase TokenFilter does what you expect, which can be
|
||||
when you want your sorting to be case insensitive
|
||||
-->
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
<!-- The TrimFilter removes any leading or trailing whitespace -->
|
||||
<filter class="solr.TrimFilterFactory" />
|
||||
<!-- The PatternReplaceFilter gives you the flexibility to use
|
||||
Java Regular expression to replace any sequence of characters
|
||||
matching a pattern with an arbitrary replacement string,
|
||||
which may include back references to portions of the original
|
||||
string matched by the pattern.
|
||||
|
||||
See the Java Regular Expression documentation for more
|
||||
information on pattern and replacement string syntax.
|
||||
|
||||
http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html
|
||||
-->
|
||||
<filter class="solr.PatternReplaceFilterFactory"
|
||||
pattern="([^a-z])" replacement="" replace="all"
|
||||
/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<!--
|
||||
The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
|
||||
a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
|
||||
Attributes of the DelimitedPayloadTokenFilterFactory :
|
||||
"delimiter" - a one character delimiter. Default is | (pipe)
|
||||
"encoder" - how to encode the following value into a playload
|
||||
float -> org.apache.lucene.analysis.payloads.FloatEncoder,
|
||||
integer -> o.a.l.a.p.IntegerEncoder
|
||||
identity -> o.a.l.a.p.IdentityEncoder
|
||||
Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
|
||||
-->
|
||||
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<!-- lowercases the entire field value, keeping it as a single token. -->
|
||||
<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="text_path" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.PathHierarchyTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- since fields of this type are by default not stored or indexed,
|
||||
any data added to them will be ignored outright. -->
|
||||
<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
|
||||
|
||||
<!-- This point type indexes the coordinates as separate fields (subFields)
|
||||
If subFieldType is defined, it references a type, and a dynamic field
|
||||
definition is created matching *___<typename>. Alternately, if
|
||||
subFieldSuffix is defined, that is used to create the subFields.
|
||||
Example: if subFieldType="double", then the coordinates would be
|
||||
indexed in fields myloc_0___double,myloc_1___double.
|
||||
Example: if subFieldSuffix="_d" then the coordinates would be indexed
|
||||
in fields myloc_0_d,myloc_1_d
|
||||
The subFields are an implementation detail of the fieldType, and end
|
||||
users normally should not need to know about them.
|
||||
-->
|
||||
<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
|
||||
|
||||
<!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
|
||||
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
|
||||
|
||||
<!--
|
||||
A Geohash is a compact representation of a latitude longitude pair in a single field.
|
||||
See http://wiki.apache.org/solr/SpatialSearch
|
||||
-->
|
||||
<fieldtype name="geohash" class="solr.GeoHashField"/>
|
@ -103,11 +103,11 @@ class SolrIndexTest extends SapphireTest {
|
||||
}
|
||||
|
||||
protected function getServiceMock() {
|
||||
return Phockito::mock('SolrService');
|
||||
return Phockito::mock('Solr3Service');
|
||||
}
|
||||
|
||||
protected function getServiceSpy() {
|
||||
$serviceSpy = Phockito::spy('SolrService');
|
||||
$serviceSpy = Phockito::spy('Solr3Service');
|
||||
Phockito::when($serviceSpy)->_sendRawPost()->return($this->getFakeRawSolrResponse());
|
||||
|
||||
return $serviceSpy;
|
||||
|
Loading…
Reference in New Issue
Block a user