diff --git a/code/solr/SolrIndex.php b/code/solr/SolrIndex.php index e023b02..4c04d1e 100644 --- a/code/solr/SolrIndex.php +++ b/code/solr/SolrIndex.php @@ -284,6 +284,10 @@ abstract class SolrIndex extends SearchIndex { $ret['Matches']->setPageStart($offset); // Results per page $ret['Matches']->setPageLength($limit); + // Suggestions (requires custom setup, assumes spellcheck.collate=true) + if(isset($res->spellcheck->suggestions->collation)) { + $ret['Suggestion'] = $res->spellcheck->suggestions->collation; + } return new ArrayData($ret); } diff --git a/conf/extras/solrconfig.xml b/conf/extras/solrconfig.xml index 61e188a..6bc196d 100644 --- a/conf/extras/solrconfig.xml +++ b/conf/extras/solrconfig.xml @@ -761,6 +761,11 @@ nameOfCustomComponent2 --> + + + spellcheck + + - + default - name - spellchecker + _text + ./spellchecker + solr.IndexBasedSpellChecker + true diff --git a/conf/templates/types.ss b/conf/templates/types.ss index 4b1a1f3..135c8ac 100644 --- a/conf/templates/types.ss +++ b/conf/templates/types.ss @@ -192,6 +192,17 @@ + + + + + + + + + + + diff --git a/docs/Solr.md b/docs/Solr.md index 08b456e..c0864ea 100644 --- a/docs/Solr.md +++ b/docs/Solr.md @@ -76,6 +76,81 @@ You can also copy the `thirdparty/`solr directory somewhere else, just set the path value in `mysite/_config.php` to point to the new location. And of course run `java -jar start.jar` from the new directory. +### Spell Checking ("Did you mean...") + +Solr has various spell checking strategies (see the ["SpellCheckComponent" docs](http://wiki.apache.org/solr/SpellCheckComponent)), all of which are configured through `solrconfig.xml`. +In the default config which is copied into your index, +spell checking data is collected from all fulltext fields +(everything you added through `SolrIndex->addFulltextField()`). +The values of these fields are collected in a special `_text` field. + + $index = new MyIndex(); + $query = new SearchQuery(); + $query->search('My Term'); + $params = array('spellcheck' => 'true', 'spellcheck.collate' => 'true'); + $results = $index->search($query, -1, -1, $params); + $results->spellcheck + +The built-in `_text` data is better than nothing, but also has some problems: +Its heavily processed, for example by stemming filters which butcher words. +So misspelling "Govnernance" will suggest "govern" rather than "Governance". +This can be fixed by aggregating spell checking data in a separate + + addCopyField('SiteTree_Title', 'spellcheckData'); + $this->addCopyField('DMSDocument_Title', 'spellcheckData'); + $this->addCopyField('SiteTree_Content', 'spellcheckData'); + $this->addCopyField('DMSDocument_Content', 'spellcheckData'); + } + + // ... + + function getFieldDefinitions() { + $xml = parent::getFieldDefinitions(); + + $xml .= "\n\n\t\t"; + $xml .= "\n\t\t"; + + return $xml; + } + + } + +Now you need to tell solr to use our new field for gathering spelling data. +In order to customize the spell checking configuration, +create your own `solrconfig.xml` (see "File-based configuration"). +In there, change the following directive: + + + + + spellcheckData +