From 9834b94f97e4373c06b9c517fd36fee444f4976d Mon Sep 17 00:00:00 2001
From: Scott Hutchinson <scott@silverstripe.com>
Date: Tue, 23 Oct 2018 18:30:34 +1300
Subject: [PATCH] Enable macrons in search by default

---
 conf/solr/4/templates/types.ss       | 26 ++++++++++++---------
 docs/en/05_advanced_configuration.md | 34 ++++++++++++++--------------
 2 files changed, 32 insertions(+), 28 deletions(-)
diff --git a/conf/solr/4/templates/types.ss b/conf/solr/4/templates/types.ss
index 974ee2d..85ab507 100644
--- a/conf/solr/4/templates/types.ss
+++ b/conf/solr/4/templates/types.ss
@@ -8,7 +8,7 @@
 
     <!-- The optional sortMissingLast and sortMissingFirst attributes are
          currently supported on types that are sorted internally as strings.
-	       This includes "string","boolean","sint","slong","sfloat","sdouble","pdate"
+         This includes "string","boolean","sint","slong","sfloat","sdouble","pdate"
        - If sortMissingLast="true", then a sort on this field will cause documents
          without the field to come after documents with the field,
          regardless of the requested sort order (asc or desc).
@@ -136,9 +136,11 @@
         <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
         <filter class="solr.LowerCaseFilterFactory"/>
         <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
       </analyzer>
       <analyzer type="query">
         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
         <filter class="solr.KeywordRepeatFilterFactory"/>
         <filter class="solr.StopFilterFactory"
                 ignoreCase="true"
@@ -162,9 +164,11 @@
         <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
         <filter class="solr.LowerCaseFilterFactory"/>
         <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
       </analyzer>
       <analyzer type="query">
         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
         <filter class="solr.KeywordRepeatFilterFactory"/>
         <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/>
         <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
@@ -210,8 +214,8 @@
             <tokenizer class="solr.StandardTokenizerFactory" />
             <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
             <filter class="solr.LengthFilterFactory" min="4" max="20" />
-            <filter class="solr.LowerCaseFilterFactory" /> 
-            <filter class="solr.RemoveDuplicatesTokenFilterFactory" /> 
+            <filter class="solr.LowerCaseFilterFactory" />
+            <filter class="solr.RemoveDuplicatesTokenFilterFactory" />
         </analyzer>
     </fieldType>
 
@@ -239,7 +243,7 @@
 
     <!-- A general unstemmed text field that indexes tokens normally and also
          reversed (via ReversedWildcardFilterFactory), to enable more efficient
-	 leading wildcard queries. -->
+         leading wildcard queries. -->
     <fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100">
       <analyzer type="index">
         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
@@ -320,10 +324,10 @@
         a token of "foo|1.4"  would be indexed as "foo" with a payload of 1.4f
         Attributes of the DelimitedPayloadTokenFilterFactory :
          "delimiter" - a one character delimiter. Default is | (pipe)
-	 "encoder" - how to encode the following value into a playload
-	    float -> org.apache.lucene.analysis.payloads.FloatEncoder,
-	    integer -> o.a.l.a.p.IntegerEncoder
-	    identity -> o.a.l.a.p.IdentityEncoder
+         "encoder" - how to encode the following value into a playload
+            float -> org.apache.lucene.analysis.payloads.FloatEncoder,
+            integer -> o.a.l.a.p.IntegerEncoder
+            identity -> o.a.l.a.p.IdentityEncoder
             Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
          -->
         <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
@@ -364,8 +368,8 @@
     <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
     <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
 
-   <!--
+    <!--
     A Geohash is a compact representation of a latitude longitude pair in a single field.
     See http://wiki.apache.org/solr/SpatialSearch
-   -->
-    <fieldtype name="geohash" class="solr.GeoHashField"/>
\ No newline at end of file
+    -->
+    <fieldtype name="geohash" class="solr.GeoHashField"/>
diff --git a/docs/en/05_advanced_configuration.md b/docs/en/05_advanced_configuration.md
index 942af4c..19ffe8e 100644
--- a/docs/en/05_advanced_configuration.md
+++ b/docs/en/05_advanced_configuration.md
@@ -11,7 +11,7 @@ use SilverStripe\FullTextSearch\Search\Queries\SearchQuery;
 $index = MyIndex::singleton();
 $query = SearchQuery::create()
     ->addSearchTerm('My Term');
-$params = [    
+$params = [
     'facet' => 'true',
     'facet.field' => 'SiteTree_ClassName',
 ];
@@ -159,7 +159,7 @@ substituted, which will include the original token.
 
 * Two comma-separated lists of words with the symbol "=>" between them. If the token matches any word on
 the left, then the list on the right is substituted. The original token will not be included unless it is also in the
-list on the right. 
+list on the right.
 
 For example:
 
@@ -207,7 +207,7 @@ $results = $index->search($query, -1, -1, $params);
 $results->spellcheck;
 ```
 
-The built-in `_text` data is better than nothing, but also has some problems: it's heavily processed, for example by 
+The built-in `_text` data is better than nothing, but also has some problems: it's heavily processed, for example by
 stemming filters which butcher words. So misspelling "Govnernance" will suggest "govern" rather than "Governance".
 This can be fixed by aggregating spell checking data in a separate field.
 
@@ -289,22 +289,22 @@ Each result will automatically contain an `Excerpt` property which you can use i
 to avoid matching HTML attributes, and cluttering highlighted content with unparsed HTML.
 
 ## Boosting/Weighting
- 
+
  Results aren't all created equal. Matches in some fields are more important than others; for example, a page `Title` might be considered more relevant to the user than terms in the `Content` field.
- 
+
  To account for this, a "weighting" (or "boosting") factor can be applied to each searched field. The default value is `1.0`, anything below that will decrease the relevance, anything above increases it. You can get more information on relevancy at the [Solr wiki](http://wiki.apache.org/solr/SolrRelevancyFAQ).
- 
+
 You can manage the boosting in two ways:
- 
+
 ### Boosting on query
- 
+
  To adjust the relative values at the time of querying, pass them in as the third argument to your `addSearchTerm()` call:
- 
+
  ```php
  use My\Namespace\Index\MyIndex;
  use SilverStripe\FullTextSearch\Search\Queries\SearchQuery;
  use Page;
- 
+
  $query = SearchQuery::create()
      ->addSearchTerm(
          'fire',
@@ -317,9 +317,9 @@ You can manage the boosting in two ways:
      );
  $results = MyIndex::singleton()->search($query);
  ```
- 
+
  This will ensure that `Title` is given higher priority for matches than `Content`, which is well above `SecretParagraph`.
- 
+
 ### Boosting on index
 
 Boost values for specific can also be specified directly on the `SolrIndex` class directly.
@@ -357,14 +357,14 @@ class SolrSearchIndex extends SolrIndex
 
 ## Indexing related objects
 
-To add a related object to your index. 
+To add a related object to your index.
 
 ## Subsites
 
 When you are utilising the [subsites module](https://github.com/silverstripe/silverstripe-subsites) you
 may want to add [boosting](#boosting/weighting) to results from the current subsite. To do so, you'll
 need to use [eDisMax](https://lucene.apache.org/solr/guide/6_6/the-extended-dismax-query-parser.html)
-and the supporting parameters `bq` and `bf`. You should add the following to your `SolrIndex` 
+and the supporting parameters `bq` and `bf`. You should add the following to your `SolrIndex`
 extension:
 
 ```php
@@ -385,7 +385,7 @@ public function search(SearchQuery $query, $offset = -1, $limit = -1, $params =
 ## Custom field types
 
 Solr supports custom field type definitions which are written to its XML schema. Many standard ones are already included
- in the default schema. As the XML file is generated dynamically, we can add our own types by overloading the template 
+ in the default schema. As the XML file is generated dynamically, we can add our own types by overloading the template
  responsible for it: `types.ss`.
 
 In the following example, we read our type definitions from a new file `mysite/solr/templates/types.ss` instead:
@@ -428,7 +428,7 @@ To allow searches on words containing numeric tokens, you'll need to change the
 
 The `ASCIIFoldingFilterFactory` filter converts alphabetic, numeric, and symbolic Unicode characters which are not in the Basic Latin Unicode block (the first 127 ASCII characters) to their ASCII equivalents, if one exists.
 
-Find the fields in your overloaded `types.ss` that you want to enable this behaviour in, for example inside the `<fieldType name="htmltext">` block, add the following to both its index analyzer and query analyzer records.
+By default, this functionality is enabled on the `htmltext` and `text` fieldTypes. If you want it enabled for any other fieldTypes simply find the fields in your overloaded `types.ss` that you want to enable this behaviour in, for example inside the `<fieldType name="textTight">` block, add the following to both its index analyzer and query analyzer records.
 
 ```xml
 <filter class="solr.ASCIIFoldingFilterFactory"/>
@@ -436,7 +436,7 @@ Find the fields in your overloaded `types.ss` that you want to enable this behav
 
 ## Text extraction
 
-Solr provides built-in text extraction capabilities for PDF and Office documents, and numerous other formats, through 
+Solr provides built-in text extraction capabilities for PDF and Office documents, and numerous other formats, through
 the `ExtractingRequestHandler` API (see [the Solr wiki entry](http://wiki.apache.org/solr/ExtractingRequestHandler).
 If you're using a default Solr installation, it's most likely already bundled and set up. But if you plan on running the
 Solr server integrated into this module, you'll need to download the libraries and link them first. Run the following