diff --git a/docs/en/00_index.md b/docs/en/00_index.md index c81efc8..f5f8da8 100644 --- a/docs/en/00_index.md +++ b/docs/en/00_index.md @@ -18,12 +18,13 @@ - Advanced configuration - [Facets](04_advanced_configuration.md#facets) - [Using multiple indexes](04_advanced_configuration.md#multiple-indexes) - - [Synonyms](04_advanced_configuration.md#synonyms) + - [Analyzers, tokens and token filters](04_advanced_configuration.md#analyzers,-tokenizers-and-token-filters) - [Spellcheck](04_advanced_configuration.md#spell-check-("did-you-mean...")) - [Highlighting](04_advanced_configuration.md#highlighting) - [Boosting](04_advanced_configuration.md#boosting) - [Indexing related objects](04_advanced_configuration.md#indexing-related-objects) - [Subsites](04_advanced_configuration.md#subsites) - [Custom field types](04_advanced_configuration.md#custom-field-types) + = [Text extraction](04_advanced_configuration.md#text-extraction) - Troubleshooting - [Gotchas](05_troubleshooting.md#common-gotchas) diff --git a/docs/en/01_getting_started.md b/docs/en/01_getting_started.md index eb05674..dc6d877 100644 --- a/docs/en/01_getting_started.md +++ b/docs/en/01_getting_started.md @@ -52,6 +52,15 @@ This will: - Create a DefaultIndex - Run a [Solr Configure](03_configuration.md#solr-configure) and a [Solr Reindex](03_configuration.md#solr-reindex) -You'll then need to build a search form and results display that suits the functionality of your site. +If you have the [CMS module](https://github.com/silverstripe/silverstripe-cms) installed, you will be able to simply add + `$SearchForm` to your template to add a Solr search form. Default configuration is added via the + [`ContentControllerExtension`](/src/Solr/Control/ContentControllerExtension.php) and alternative + [`SearchForm`](/src/Solr/Forms/SearchForm.php). With the + [Simple theme](https://github.com/silverstripe-themes/silverstripe-simple), this is in the + [`Header`](https://github.com/silverstripe-themes/silverstripe-simple/blob/master/templates/Includes/Header.ss#L10-L15) + by default. -// TODO update me when https://github.com/silverstripe/silverstripe-fulltextsearch/pull/216 is merged +Ensure that you _don't_ have `SilverStripe\ORM\Search\FulltextSearchable::enable()` set in `_config.php`, as the +`SearchForm` action provided by that class will conflict. + +You can override the default template with a new one at `templates/Layout/Page_results_solr.ss`. diff --git a/docs/en/03_configuration.md b/docs/en/03_configuration.md index 718274f..247a949 100644 --- a/docs/en/03_configuration.md +++ b/docs/en/03_configuration.md @@ -13,8 +13,8 @@ Solr::configure_server([ 'path' => '/solr', // The suburl the Solr service is available on 'version' => '4', // Solr server version - currently only 3 and 4 supported 'service' => 'Solr4Service', // The class that provides actual communcation to the Solr server - 'extraspath' => BASE_PATH .'/fulltextsearch/conf/solr/4/extras/', // Absolute path to the folder containing templates used for generating the schema and field definitions - 'templates' => BASE_PATH . '/fulltextsearch/conf/solr/4/templates/', // Absolute path to the configuration default files, e.g. solrconfig.xml + 'extraspath' => BASE_PATH .'/vendor/silverstripe/fulltextsearch/conf/solr/4/extras/', // Absolute path to the folder containing templates used for generating the schema and field definitions + 'templates' => BASE_PATH . '/vendor/silverstripe/fulltextsearch/conf/solr/4/templates/', // Absolute path to the configuration default files, e.g. solrconfig.xml 'indexstore' => [ 'mode' => NULL, // [REQUIRED] a classname which implements SolrConfigStore, or 'file' or 'webdav' 'path' => NULL, // [REQUIRED] The (locally accessible) path to write the index configurations to OR The suburl on the Solr host that is set up to accept index configurations via webdav (e.g. BASE_PATH . '/.solr') diff --git a/docs/en/04_advanced_configuration.md b/docs/en/04_advanced_configuration.md index a0cd754..f31c7d5 100644 --- a/docs/en/04_advanced_configuration.md +++ b/docs/en/04_advanced_configuration.md @@ -114,7 +114,77 @@ SilverStripe\FullTextSearch\Search\FullTextSearch: - CoreSearchIndex ``` -## Synonyms +## Analyzers, Tokenizers and Token Filters + +When a document is indexed, its individual fields are subject to the analyzing and tokenizing filters that can transform +and normalize the data in the fields. You can remove blank spaces, strip HTML, replace a particular character and much +more as described in the [Solr Wiki](http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters). + +### Synonyms + +To add synonym processing at query-time, you can add the `SynonymFilterFactory` as an `Analyzer`: + +```php +use SilverStripe\FullTextSearch\Solr\SolrIndex; +use Page; + +class MyIndex extends SolrIndex +{ + public function init() + { + $this->addClass(Page::class); + $this->addField('Content'); + $this->addAnalyzer('Content', 'filter', [ + 'class' => 'solr.SynonymFilterFactory', + 'synonyms' => 'synonyms.txt', + 'ignoreCase' => 'true', + 'expand' => 'false' + ]); + } +} +``` + +This generates the following XML schema definition: + +```xml + + + +``` + +In this case, you most likely also want to define your own synonyms list. You can define a mapping in one of two ways: + +* A comma-separated list of words. If the token matches any of the words, then all the words in the list are +substituted, which will include the original token. + +* Two comma-separated lists of words with the symbol "=>" between them. If the token matches any word on +the left, then the list on the right is substituted. The original token will not be included unless it is also in the +list on the right. + +For example: + +```text +couch,sofa,lounger +teh => the +small => teeny,tiny,weeny +``` + +Then you should update your [Solr configuration](03_configuration.md#solr-server-parameters) to include your synonyms +file via the `extraspath` parameter, for example: + +```php +use SilverStripe\FullTextSearch\Solr\Solr; + +Solr::configure_server([ + 'extraspath' => BASE_PATH . '/mysite/Solr/', + 'indexstore' => [ + 'mode' => 'file', + 'path' => BASE_PATH . '/.solr', + ] +]); +``` + +Will include `/mysite/Solr/synonyms.txt` as your list after a [Solr configure](03_configuration.md#solr-configure) ## Spell check ("Did you mean...") @@ -340,3 +410,33 @@ Find the fields in your overloaded `types.ss` that you want to enable this behav ```xml ``` + +## Text extraction + +Solr provides built-in text extraction capabilities for PDF and Office documents, and numerous other formats, through +the `ExtractingRequestHandler` API (see [the Solr wiki entry](http://wiki.apache.org/solr/ExtractingRequestHandler). +If you're using a default Solr installation, it's most likely already bundled and set up. But if you plan on running the +Solr server integrated into this module, you'll need to download the libraries and link them first. Run the following +commands from the webroot: + +``` +wget http://archive.apache.org/dist/lucene/solr/4.10.4/solr-4.10.4.tgz +tar -xvzf solr-4.10.4.tgz +mkdir .solr/PageSolrIndexboot/dist +mkdir .solr/PageSolrIndexboot/contrib +cp solr-4.10.4/dist/solr-cell-4.10.4.jar .solr/PageSolrIndexboot/dist/ +cp -R solr-4.10.4/contrib/extraction .solr/PageSolrIndexboot/contrib/ +rm -rf solr-4.10.4 solr-4.10.4.tgz +``` + +Create a custom `solrconfig.xml` (see [File-based configuration](03_configuration.md#file-based-configuration)). + +Add the following XML configuration: + +```xml + + +``` + +Now run a [Solr configure](03_configuration.md#solr-configure). You can use Solr text extraction either directly through +the HTTP API, or through the [Text extraction module](https://github.com/silverstripe-labs/silverstripe-textextraction). diff --git a/docs/en/Solr.md b/docs/en/Solr.md index da082aa..64627f9 100644 --- a/docs/en/Solr.md +++ b/docs/en/Solr.md @@ -1,69 +1,3 @@ -### Adding Analyzers, Tokenizers and Token Filters - -When a document is indexed, its individual fields are subject to the analyzing and tokenizing filters that can transform and normalize the data in the fields. For example — removing blank spaces, removing html code, stemming, removing a particular character and replacing it with another -(see [Solr Wiki](http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters)). - -Example: Replace synonyms on indexing (e.g. "i-pad" with "iPad") - -```php -use SilverStripe\FullTextSearch\Solr\SolrIndex; - -class MyIndex extends SolrIndex -{ - public function init() - { - $this->addClass(Page::class); - $this->addField('Content'); - $this->addAnalyzer('Content', 'filter', ['class' => 'solr.SynonymFilterFactory']); - } -} -``` - -Generates the following XML schema definition: - -```xml - - - -``` - -### Text Extraction - -Solr provides built-in text extraction capabilities for PDF and Office documents, -and numerous other formats, through the `ExtractingRequestHandler` API -(see http://wiki.apache.org/solr/ExtractingRequestHandler). -If you're using a default Solr installation, it's most likely already -bundled and set up. But if you plan on running the Solr server integrated -into this module, you'll need to download the libraries and link the first. - -``` -wget http://archive.apache.org/dist/lucene/solr/3.1.0/apache-solr-3.1.0.tgz -mkdir tmp -tar -xvzf apache-solr-3.1.0.tgz -mkdir .solr/PageSolrIndexboot/dist -mkdir .solr/PageSolrIndexboot/contrib -cp apache-solr-3.1.0/dist/apache-solr-cell-3.1.0.jar .solr/PageSolrIndexboot/dist/ -cp -R apache-solr-3.1.0/contrib/extraction .solr/PageSolrIndexboot/contrib/ -rm -rf apache-solr-3.1.0 apache-solr-3.1.0.tgz -``` - -Create a custom `solrconfig.xml` (see "File-based configuration"). -Add the following XML configuration. - -```xml - - -``` - -Now apply the configuration: - -``` -vendor/bin/sake dev/tasks/Solr_Configure -``` - -Now you can use Solr text extraction either directly through the HTTP API, -or indirectly through the ["textextraction" module](https://github.com/silverstripe-labs/silverstripe-textextraction). - ## Adding DataObject classes to Solr search If you create a class that extends `DataObject` (and not `Page`) then it won't be automatically added to the search diff --git a/src/Solr/SolrIndex.php b/src/Solr/SolrIndex.php index 5195413..c25cde5 100644 --- a/src/Solr/SolrIndex.php +++ b/src/Solr/SolrIndex.php @@ -137,7 +137,7 @@ abstract class SolrIndex extends SearchIndex * * @param string $field * @param string $type - * @param Array $params Parameters for the analyzer, usually at least a "class" + * @param array $params parameters for the analyzer, usually at least a "class" */ public function addAnalyzer($field, $type, $params) {