Enable macrons in search by default
This commit is contained in:
parent
e4fd283d11
commit
9834b94f97
|
@ -8,7 +8,7 @@
|
||||||
|
|
||||||
<!-- The optional sortMissingLast and sortMissingFirst attributes are
|
<!-- The optional sortMissingLast and sortMissingFirst attributes are
|
||||||
currently supported on types that are sorted internally as strings.
|
currently supported on types that are sorted internally as strings.
|
||||||
This includes "string","boolean","sint","slong","sfloat","sdouble","pdate"
|
This includes "string","boolean","sint","slong","sfloat","sdouble","pdate"
|
||||||
- If sortMissingLast="true", then a sort on this field will cause documents
|
- If sortMissingLast="true", then a sort on this field will cause documents
|
||||||
without the field to come after documents with the field,
|
without the field to come after documents with the field,
|
||||||
regardless of the requested sort order (asc or desc).
|
regardless of the requested sort order (asc or desc).
|
||||||
|
@ -136,9 +136,11 @@
|
||||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||||
<filter class="solr.LowerCaseFilterFactory"/>
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||||
|
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||||
</analyzer>
|
</analyzer>
|
||||||
<analyzer type="query">
|
<analyzer type="query">
|
||||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||||
<filter class="solr.KeywordRepeatFilterFactory"/>
|
<filter class="solr.KeywordRepeatFilterFactory"/>
|
||||||
<filter class="solr.StopFilterFactory"
|
<filter class="solr.StopFilterFactory"
|
||||||
ignoreCase="true"
|
ignoreCase="true"
|
||||||
|
@ -162,9 +164,11 @@
|
||||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||||
<filter class="solr.LowerCaseFilterFactory"/>
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||||
|
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||||
</analyzer>
|
</analyzer>
|
||||||
<analyzer type="query">
|
<analyzer type="query">
|
||||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||||
<filter class="solr.KeywordRepeatFilterFactory"/>
|
<filter class="solr.KeywordRepeatFilterFactory"/>
|
||||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/>
|
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/>
|
||||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||||
|
@ -210,8 +214,8 @@
|
||||||
<tokenizer class="solr.StandardTokenizerFactory" />
|
<tokenizer class="solr.StandardTokenizerFactory" />
|
||||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||||
<filter class="solr.LengthFilterFactory" min="4" max="20" />
|
<filter class="solr.LengthFilterFactory" min="4" max="20" />
|
||||||
<filter class="solr.LowerCaseFilterFactory" />
|
<filter class="solr.LowerCaseFilterFactory" />
|
||||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
|
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
|
||||||
</analyzer>
|
</analyzer>
|
||||||
</fieldType>
|
</fieldType>
|
||||||
|
|
||||||
|
@ -239,7 +243,7 @@
|
||||||
|
|
||||||
<!-- A general unstemmed text field that indexes tokens normally and also
|
<!-- A general unstemmed text field that indexes tokens normally and also
|
||||||
reversed (via ReversedWildcardFilterFactory), to enable more efficient
|
reversed (via ReversedWildcardFilterFactory), to enable more efficient
|
||||||
leading wildcard queries. -->
|
leading wildcard queries. -->
|
||||||
<fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100">
|
<fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100">
|
||||||
<analyzer type="index">
|
<analyzer type="index">
|
||||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
@ -320,10 +324,10 @@
|
||||||
a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
|
a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
|
||||||
Attributes of the DelimitedPayloadTokenFilterFactory :
|
Attributes of the DelimitedPayloadTokenFilterFactory :
|
||||||
"delimiter" - a one character delimiter. Default is | (pipe)
|
"delimiter" - a one character delimiter. Default is | (pipe)
|
||||||
"encoder" - how to encode the following value into a playload
|
"encoder" - how to encode the following value into a playload
|
||||||
float -> org.apache.lucene.analysis.payloads.FloatEncoder,
|
float -> org.apache.lucene.analysis.payloads.FloatEncoder,
|
||||||
integer -> o.a.l.a.p.IntegerEncoder
|
integer -> o.a.l.a.p.IntegerEncoder
|
||||||
identity -> o.a.l.a.p.IdentityEncoder
|
identity -> o.a.l.a.p.IdentityEncoder
|
||||||
Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
|
Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
|
||||||
-->
|
-->
|
||||||
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
|
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
|
||||||
|
@ -364,8 +368,8 @@
|
||||||
<!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
|
<!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
|
||||||
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
|
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
|
||||||
|
|
||||||
<!--
|
<!--
|
||||||
A Geohash is a compact representation of a latitude longitude pair in a single field.
|
A Geohash is a compact representation of a latitude longitude pair in a single field.
|
||||||
See http://wiki.apache.org/solr/SpatialSearch
|
See http://wiki.apache.org/solr/SpatialSearch
|
||||||
-->
|
-->
|
||||||
<fieldtype name="geohash" class="solr.GeoHashField"/>
|
<fieldtype name="geohash" class="solr.GeoHashField"/>
|
||||||
|
|
|
@ -11,7 +11,7 @@ use SilverStripe\FullTextSearch\Search\Queries\SearchQuery;
|
||||||
$index = MyIndex::singleton();
|
$index = MyIndex::singleton();
|
||||||
$query = SearchQuery::create()
|
$query = SearchQuery::create()
|
||||||
->addSearchTerm('My Term');
|
->addSearchTerm('My Term');
|
||||||
$params = [
|
$params = [
|
||||||
'facet' => 'true',
|
'facet' => 'true',
|
||||||
'facet.field' => 'SiteTree_ClassName',
|
'facet.field' => 'SiteTree_ClassName',
|
||||||
];
|
];
|
||||||
|
@ -159,7 +159,7 @@ substituted, which will include the original token.
|
||||||
|
|
||||||
* Two comma-separated lists of words with the symbol "=>" between them. If the token matches any word on
|
* Two comma-separated lists of words with the symbol "=>" between them. If the token matches any word on
|
||||||
the left, then the list on the right is substituted. The original token will not be included unless it is also in the
|
the left, then the list on the right is substituted. The original token will not be included unless it is also in the
|
||||||
list on the right.
|
list on the right.
|
||||||
|
|
||||||
For example:
|
For example:
|
||||||
|
|
||||||
|
@ -207,7 +207,7 @@ $results = $index->search($query, -1, -1, $params);
|
||||||
$results->spellcheck;
|
$results->spellcheck;
|
||||||
```
|
```
|
||||||
|
|
||||||
The built-in `_text` data is better than nothing, but also has some problems: it's heavily processed, for example by
|
The built-in `_text` data is better than nothing, but also has some problems: it's heavily processed, for example by
|
||||||
stemming filters which butcher words. So misspelling "Govnernance" will suggest "govern" rather than "Governance".
|
stemming filters which butcher words. So misspelling "Govnernance" will suggest "govern" rather than "Governance".
|
||||||
This can be fixed by aggregating spell checking data in a separate field.
|
This can be fixed by aggregating spell checking data in a separate field.
|
||||||
|
|
||||||
|
@ -289,22 +289,22 @@ Each result will automatically contain an `Excerpt` property which you can use i
|
||||||
to avoid matching HTML attributes, and cluttering highlighted content with unparsed HTML.
|
to avoid matching HTML attributes, and cluttering highlighted content with unparsed HTML.
|
||||||
|
|
||||||
## Boosting/Weighting
|
## Boosting/Weighting
|
||||||
|
|
||||||
Results aren't all created equal. Matches in some fields are more important than others; for example, a page `Title` might be considered more relevant to the user than terms in the `Content` field.
|
Results aren't all created equal. Matches in some fields are more important than others; for example, a page `Title` might be considered more relevant to the user than terms in the `Content` field.
|
||||||
|
|
||||||
To account for this, a "weighting" (or "boosting") factor can be applied to each searched field. The default value is `1.0`, anything below that will decrease the relevance, anything above increases it. You can get more information on relevancy at the [Solr wiki](http://wiki.apache.org/solr/SolrRelevancyFAQ).
|
To account for this, a "weighting" (or "boosting") factor can be applied to each searched field. The default value is `1.0`, anything below that will decrease the relevance, anything above increases it. You can get more information on relevancy at the [Solr wiki](http://wiki.apache.org/solr/SolrRelevancyFAQ).
|
||||||
|
|
||||||
You can manage the boosting in two ways:
|
You can manage the boosting in two ways:
|
||||||
|
|
||||||
### Boosting on query
|
### Boosting on query
|
||||||
|
|
||||||
To adjust the relative values at the time of querying, pass them in as the third argument to your `addSearchTerm()` call:
|
To adjust the relative values at the time of querying, pass them in as the third argument to your `addSearchTerm()` call:
|
||||||
|
|
||||||
```php
|
```php
|
||||||
use My\Namespace\Index\MyIndex;
|
use My\Namespace\Index\MyIndex;
|
||||||
use SilverStripe\FullTextSearch\Search\Queries\SearchQuery;
|
use SilverStripe\FullTextSearch\Search\Queries\SearchQuery;
|
||||||
use Page;
|
use Page;
|
||||||
|
|
||||||
$query = SearchQuery::create()
|
$query = SearchQuery::create()
|
||||||
->addSearchTerm(
|
->addSearchTerm(
|
||||||
'fire',
|
'fire',
|
||||||
|
@ -317,9 +317,9 @@ You can manage the boosting in two ways:
|
||||||
);
|
);
|
||||||
$results = MyIndex::singleton()->search($query);
|
$results = MyIndex::singleton()->search($query);
|
||||||
```
|
```
|
||||||
|
|
||||||
This will ensure that `Title` is given higher priority for matches than `Content`, which is well above `SecretParagraph`.
|
This will ensure that `Title` is given higher priority for matches than `Content`, which is well above `SecretParagraph`.
|
||||||
|
|
||||||
### Boosting on index
|
### Boosting on index
|
||||||
|
|
||||||
Boost values for specific can also be specified directly on the `SolrIndex` class directly.
|
Boost values for specific can also be specified directly on the `SolrIndex` class directly.
|
||||||
|
@ -357,14 +357,14 @@ class SolrSearchIndex extends SolrIndex
|
||||||
|
|
||||||
## Indexing related objects
|
## Indexing related objects
|
||||||
|
|
||||||
To add a related object to your index.
|
To add a related object to your index.
|
||||||
|
|
||||||
## Subsites
|
## Subsites
|
||||||
|
|
||||||
When you are utilising the [subsites module](https://github.com/silverstripe/silverstripe-subsites) you
|
When you are utilising the [subsites module](https://github.com/silverstripe/silverstripe-subsites) you
|
||||||
may want to add [boosting](#boosting/weighting) to results from the current subsite. To do so, you'll
|
may want to add [boosting](#boosting/weighting) to results from the current subsite. To do so, you'll
|
||||||
need to use [eDisMax](https://lucene.apache.org/solr/guide/6_6/the-extended-dismax-query-parser.html)
|
need to use [eDisMax](https://lucene.apache.org/solr/guide/6_6/the-extended-dismax-query-parser.html)
|
||||||
and the supporting parameters `bq` and `bf`. You should add the following to your `SolrIndex`
|
and the supporting parameters `bq` and `bf`. You should add the following to your `SolrIndex`
|
||||||
extension:
|
extension:
|
||||||
|
|
||||||
```php
|
```php
|
||||||
|
@ -385,7 +385,7 @@ public function search(SearchQuery $query, $offset = -1, $limit = -1, $params =
|
||||||
## Custom field types
|
## Custom field types
|
||||||
|
|
||||||
Solr supports custom field type definitions which are written to its XML schema. Many standard ones are already included
|
Solr supports custom field type definitions which are written to its XML schema. Many standard ones are already included
|
||||||
in the default schema. As the XML file is generated dynamically, we can add our own types by overloading the template
|
in the default schema. As the XML file is generated dynamically, we can add our own types by overloading the template
|
||||||
responsible for it: `types.ss`.
|
responsible for it: `types.ss`.
|
||||||
|
|
||||||
In the following example, we read our type definitions from a new file `mysite/solr/templates/types.ss` instead:
|
In the following example, we read our type definitions from a new file `mysite/solr/templates/types.ss` instead:
|
||||||
|
@ -428,7 +428,7 @@ To allow searches on words containing numeric tokens, you'll need to change the
|
||||||
|
|
||||||
The `ASCIIFoldingFilterFactory` filter converts alphabetic, numeric, and symbolic Unicode characters which are not in the Basic Latin Unicode block (the first 127 ASCII characters) to their ASCII equivalents, if one exists.
|
The `ASCIIFoldingFilterFactory` filter converts alphabetic, numeric, and symbolic Unicode characters which are not in the Basic Latin Unicode block (the first 127 ASCII characters) to their ASCII equivalents, if one exists.
|
||||||
|
|
||||||
Find the fields in your overloaded `types.ss` that you want to enable this behaviour in, for example inside the `<fieldType name="htmltext">` block, add the following to both its index analyzer and query analyzer records.
|
By default, this functionality is enabled on the `htmltext` and `text` fieldTypes. If you want it enabled for any other fieldTypes simply find the fields in your overloaded `types.ss` that you want to enable this behaviour in, for example inside the `<fieldType name="textTight">` block, add the following to both its index analyzer and query analyzer records.
|
||||||
|
|
||||||
```xml
|
```xml
|
||||||
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||||
|
@ -436,7 +436,7 @@ Find the fields in your overloaded `types.ss` that you want to enable this behav
|
||||||
|
|
||||||
## Text extraction
|
## Text extraction
|
||||||
|
|
||||||
Solr provides built-in text extraction capabilities for PDF and Office documents, and numerous other formats, through
|
Solr provides built-in text extraction capabilities for PDF and Office documents, and numerous other formats, through
|
||||||
the `ExtractingRequestHandler` API (see [the Solr wiki entry](http://wiki.apache.org/solr/ExtractingRequestHandler).
|
the `ExtractingRequestHandler` API (see [the Solr wiki entry](http://wiki.apache.org/solr/ExtractingRequestHandler).
|
||||||
If you're using a default Solr installation, it's most likely already bundled and set up. But if you plan on running the
|
If you're using a default Solr installation, it's most likely already bundled and set up. But if you plan on running the
|
||||||
Solr server integrated into this module, you'll need to download the libraries and link them first. Run the following
|
Solr server integrated into this module, you'll need to download the libraries and link them first. Run the following
|
||||||
|
|
Loading…
Reference in New Issue