diff --git a/code/solr/SolrIndex.php b/code/solr/SolrIndex.php index 68d4f0f..0dffeee 100644 --- a/code/solr/SolrIndex.php +++ b/code/solr/SolrIndex.php @@ -24,6 +24,8 @@ abstract class SolrIndex extends SearchIndex { static $sortTypeMap = array(); + protected $analyzerFields = array(); + function generateSchema() { return $this->renderWith(Director::baseFolder() . '/fulltextsearch/conf/templates/schema.ss'); } @@ -36,6 +38,24 @@ abstract class SolrIndex extends SearchIndex { return $this->renderWith(Director::baseFolder() . '/fulltextsearch/conf/templates/types.ss'); } + /** + * Index-time analyzer which is applied to a specific field. + * Can be used to remove HTML tags, apply stemming, etc. + * + * @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.WhitespaceTokenizerFactory + * + * @param String $field + * @param String $type + * @param Array $params Parameters for the analyzer, usually at least a "class" + */ + function addAnalyzer($field, $type, $params) { + $fullFields = $this->fieldData($field); + if($fullFields) foreach($fullFields as $fullField => $spec) { + if(!isset($this->analyzerFields[$fullField])) $this->analyzerFields[$fullField] = array(); + $this->analyzerFields[$fullField][$type] = $params; + } + } + function getFieldDefinitions() { $xml = array(); $stored = Director::isDev() ? "stored='true'" : "stored='false'"; @@ -84,6 +104,13 @@ abstract class SolrIndex extends SearchIndex { $multiValued = (isset($spec['multi_valued']) && $spec['multi_valued']) ? "true" : ''; $type = isset($typeMap[$spec['type']]) ? $typeMap[$spec['type']] : $typeMap['*']; + $analyzerXml = ''; + if(isset($this->analyzerFields[$name])) { + foreach($this->analyzerFields[$name] as $analyzerType => $analyzerParams) { + $analyzerXml .= $this->toXmlTag($analyzerType, $analyzerParams); + } + } + $fieldParams = array_merge( array( 'name' => $name, @@ -97,7 +124,8 @@ abstract class SolrIndex extends SearchIndex { return $this->toXmlTag( "field", - $fieldParams + $fieldParams, + $analyzerXml ? "$analyzerXml" : null ); } diff --git a/docs/Solr.md b/docs/Solr.md index 088615d..de6ad79 100644 --- a/docs/Solr.md +++ b/docs/Solr.md @@ -76,6 +76,27 @@ You can also copy the `thirdparty/`solr directory somewhere else, just set the path value in `mysite/_config.php` to point to the new location. And of course run `java -jar start.jar` from the new directory. +### Adding Analyzers, Tokenizers and Token Filters + +When a document is indexed, its individual fields are subject to the analyzing and tokenizing filters that can transform and normalize the data in the fields. For example — removing blank spaces, removing html code, stemming, removing a particular character and replacing it with another +(see [Solr Wiki](http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters)). + +Example: Replace synonyms on indexing (e.g. "i-pad" with "iPad") + + addClass('Page'); + $this->addField('Content'); + $this->addAnalyzer('Content', 'filter', array('class' => 'solr.SynonymFilterFactory')); + } + } + + // Generates the following XML schema definition: + // + // + // + ## Debugging ### Using the web admin interface @@ -103,4 +124,4 @@ In order to query the field, reverse the search conditions and exclude the range // Wrong: Filter will ignore all empty field values $myQuery->filter(, new SearchQuery_Range('*', )); // Better: Exclude the opposite range - $myQuery->exclude(, new SearchQuery_Range(, '*')); \ No newline at end of file + $myQuery->exclude(, new SearchQuery_Range(, '*')); diff --git a/tests/SolrIndexTest.php b/tests/SolrIndexTest.php index afb8f2d..411b17b 100644 --- a/tests/SolrIndexTest.php +++ b/tests/SolrIndexTest.php @@ -65,6 +65,23 @@ class SolrIndexTest extends SapphireTest { Director::set_environment_type($origMode); } + + function testAddAnalyzer() { + $index = new SolrIndexTest_FakeIndex(); + + $defs = simplexml_load_string('' . $index->getFieldDefinitions() . ''); + $defField1 = $defs->xpath('field[@name="SearchUpdaterTest_Container_Field1"]'); + $analyzers = $defField1[0]->analyzer; + $this->assertFalse((bool)$analyzers); + + $index->addAnalyzer('Field1', 'charFilter', array('class' => 'solr.HTMLStripCharFilterFactory')); + $defs = simplexml_load_string('' . $index->getFieldDefinitions() . ''); + $defField1 = $defs->xpath('field[@name="SearchUpdaterTest_Container_Field1"]'); + $analyzers = $defField1[0]->analyzer; + $this->assertTrue((bool)$analyzers); + $this->assertEquals('solr.HTMLStripCharFilterFactory', $analyzers[0]->charFilter[0]['class']); + } + protected function getServiceMock() { $serviceMock = Phockito::mock('SolrService'); $fakeResponse = new Apache_Solr_Response(new Apache_Solr_HttpTransport_Response(null, null, null));