From b6fd7b7b899bd264da9252779f8db1d39c2ea7e3 Mon Sep 17 00:00:00 2001 From: Ingo Schommer Date: Mon, 3 Sep 2012 22:52:44 +0200 Subject: [PATCH 1/3] ENHANCEMENT Extra field definition options for SolrIndex --- code/search/SearchIndex.php | 20 +++++++----- code/solr/SolrIndex.php | 64 +++++++++++++++++++++++++++++-------- tests/SolrIndexTest.php | 16 ++++++++++ 3 files changed, 78 insertions(+), 22 deletions(-) diff --git a/code/search/SearchIndex.php b/code/search/SearchIndex.php index 81f5993..fd3904b 100644 --- a/code/search/SearchIndex.php +++ b/code/search/SearchIndex.php @@ -47,7 +47,7 @@ abstract class SearchIndex extends ViewableData { * Examines the classes this index is built on to try and find defined fields in the class hierarchy for those classes. * Looks for db and viewable-data fields, although can't nessecarily find type for viewable-data fields. */ - function fieldData($field, $forceType = null) { + function fieldData($field, $forceType = null, $extraOptions = array()) { $fullfield = str_replace(".", "_", $field); $sources = $this->getClasses(); @@ -150,7 +150,8 @@ abstract class SearchIndex extends ViewableData { 'class' => $dataclass, 'lookup_chain' => $fieldoptions['lookup_chain'], 'type' => $forceType ? $forceType : $type, - 'multi_valued' => isset($fieldoptions['multi_valued']) ? true : false + 'multi_valued' => isset($fieldoptions['multi_valued']) ? true : false, + 'extra_options' => $extraOptions ); } } @@ -200,9 +201,10 @@ abstract class SearchIndex extends ViewableData { * Add a field that should be fulltext searchable * @param String $field - The field to add * @param String $forceType - The type to force this field as (required in some cases, when not detectable from metadata) + * @param String $extraOptions - Dependent on search implementation */ - public function addFulltextField($field, $forceType = null) { - $this->fulltextFields = array_merge($this->fulltextFields, $this->fieldData($field, $forceType)); + public function addFulltextField($field, $forceType = null, $extraOptions = array()) { + $this->fulltextFields = array_merge($this->fulltextFields, $this->fieldData($field, $forceType, $extraOptions)); } public function getFulltextFields() { return $this->fulltextFields; } @@ -211,9 +213,10 @@ abstract class SearchIndex extends ViewableData { * Add a field that should be filterable * @param String $field - The field to add * @param String $forceType - The type to force this field as (required in some cases, when not detectable from metadata) + * @param String $extraOptions - Dependent on search implementation */ - public function addFilterField($field, $forceType = null) { - $this->filterFields = array_merge($this->filterFields, $this->fieldData($field, $forceType)); + public function addFilterField($field, $forceType = null, $extraOptions = array()) { + $this->filterFields = array_merge($this->filterFields, $this->fieldData($field, $forceType, $extraOptions)); } public function getFilterFields() { return $this->filterFields; } @@ -222,9 +225,10 @@ abstract class SearchIndex extends ViewableData { * Add a field that should be sortable * @param String $field - The field to add * @param String $forceType - The type to force this field as (required in some cases, when not detectable from metadata) + * @param String $extraOptions - Dependent on search implementation */ - public function addSortField($field, $forceType = null) { - $this->sortFields = array_merge($this->sortFields, $this->fieldData($field, $forceType)); + public function addSortField($field, $forceType = null, $extraOptions = array()) { + $this->sortFields = array_merge($this->sortFields, $this->fieldData($field, $forceType, $extraOptions)); } public function getSortFields() { return $this->sortFields; } diff --git a/code/solr/SolrIndex.php b/code/solr/SolrIndex.php index d56c83c..68d4f0f 100644 --- a/code/solr/SolrIndex.php +++ b/code/solr/SolrIndex.php @@ -57,33 +57,69 @@ abstract class SolrIndex extends SearchIndex { // Add the user-specified fields foreach ($this->fulltextFields as $name => $field) { - $type = isset(self::$fulltextTypeMap[$field['type']]) ? self::$fulltextTypeMap[$field['type']] : self::$fulltextTypeMap['*']; - $xml[] = ""; + $xml[] = $this->getFieldDefinition($name, $field, self::$fulltextTypeMap); } foreach ($this->filterFields as $name => $field) { if ($field['fullfield'] == 'ID' || $field['fullfield'] == 'ClassName') continue; - - $multiValued = (isset($field['multi_valued']) && $field['multi_valued']) ? "multiValued='true'" : ''; - - $type = isset(self::$filterTypeMap[$field['type']]) ? self::$filterTypeMap[$field['type']] : self::$filterTypeMap['*']; - $xml[] = ""; + $xml[] = $this->getFieldDefinition($name, $field); } foreach ($this->sortFields as $name => $field) { if ($field['fullfield'] == 'ID' || $field['fullfield'] == 'ClassName') continue; - - $multiValued = (isset($field['multi_valued']) && $field['multi_valued']) ? "multiValued='true'" : ''; - - $typeMap = array_merge(self::$filterTypeMap, self::$sortTypeMap); - $type = isset($typeMap[$field['type']]) ? $typeMap[$field['type']] : $typeMap['*']; - - $xml[] = ""; + $xml[] = $this->getFieldDefinition($name, $field); } return implode("\n\t\t", $xml); } + /** + * @param String $name + * @param Array $spec + * @param Array $typeMap + * @return String XML + */ + protected function getFieldDefinition($name, $spec, $typeMap = null) { + if(!$typeMap) $typeMap = self::$filterTypeMap; + $multiValued = (isset($spec['multi_valued']) && $spec['multi_valued']) ? "true" : ''; + $type = isset($typeMap[$spec['type']]) ? $typeMap[$spec['type']] : $typeMap['*']; + + $fieldParams = array_merge( + array( + 'name' => $name, + 'type' => $type, + 'indexed' => 'true', + 'stored' => Director::isDev() ? 'true' : 'false', + 'multiValued' => $multiValued + ), + isset($spec['extra_options']) ? $spec['extra_options'] : array() + ); + + return $this->toXmlTag( + "field", + $fieldParams + ); + } + + /** + * Convert definition to XML tag + * + * @param String $tag + * @param String $attrs Map of attributes + * @param String $content Inner content + * @return String XML tag + */ + protected function toXmlTag($tag, $attrs, $content = null) { + $xml = "<$tag "; + if($attrs) { + $attrStrs = array(); + foreach($attrs as $attrName => $attrVal) $attrStrs[] = "$attrName='$attrVal'"; + $xml .= $attrStrs ? implode(' ', $attrStrs) : ''; + } + $xml .= $content ? ">$content" : '/>'; + return $xml; + } + function getCopyFieldDefinitions() { $xml = array(); diff --git a/tests/SolrIndexTest.php b/tests/SolrIndexTest.php index 345dd7b..afb8f2d 100644 --- a/tests/SolrIndexTest.php +++ b/tests/SolrIndexTest.php @@ -49,6 +49,22 @@ class SolrIndexTest extends SapphireTest { $this->assertEquals('2010-12-30T00:00:00Z', $value['value'], 'Writes non-NULL dates'); } + function testAddFieldExtraOptions() { + $origMode = Director::get_environment_type(); + Director::set_environment_type('live'); // dev mode would for stored=true for everything + $index = new SolrIndexTest_FakeIndex(); + + $defs = simplexml_load_string('' . $index->getFieldDefinitions() . ''); + $defField1 = $defs->xpath('field[@name="SearchUpdaterTest_Container_Field1"]'); + $this->assertEquals((string)$defField1[0]['stored'], 'false'); + + $index->addFilterField('Field1', null, array('stored' => 'true')); + $defs = simplexml_load_string('' . $index->getFieldDefinitions() . ''); + $defField1 = $defs->xpath('field[@name="SearchUpdaterTest_Container_Field1"]'); + $this->assertEquals((string)$defField1[0]['stored'], 'true'); + + Director::set_environment_type($origMode); + } protected function getServiceMock() { $serviceMock = Phockito::mock('SolrService'); $fakeResponse = new Apache_Solr_Response(new Apache_Solr_HttpTransport_Response(null, null, null)); From 0ef78f905c7b26a6616e3568a671735fafdbe65d Mon Sep 17 00:00:00 2001 From: Ingo Schommer Date: Mon, 3 Sep 2012 22:53:23 +0200 Subject: [PATCH 2/3] ENHANCEMENT Per-field analyser support for SolrIndex --- code/solr/SolrIndex.php | 30 +++++++++++++++++++++++++++++- docs/Solr.md | 23 ++++++++++++++++++++++- tests/SolrIndexTest.php | 17 +++++++++++++++++ 3 files changed, 68 insertions(+), 2 deletions(-) diff --git a/code/solr/SolrIndex.php b/code/solr/SolrIndex.php index 68d4f0f..0dffeee 100644 --- a/code/solr/SolrIndex.php +++ b/code/solr/SolrIndex.php @@ -24,6 +24,8 @@ abstract class SolrIndex extends SearchIndex { static $sortTypeMap = array(); + protected $analyzerFields = array(); + function generateSchema() { return $this->renderWith(Director::baseFolder() . '/fulltextsearch/conf/templates/schema.ss'); } @@ -36,6 +38,24 @@ abstract class SolrIndex extends SearchIndex { return $this->renderWith(Director::baseFolder() . '/fulltextsearch/conf/templates/types.ss'); } + /** + * Index-time analyzer which is applied to a specific field. + * Can be used to remove HTML tags, apply stemming, etc. + * + * @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.WhitespaceTokenizerFactory + * + * @param String $field + * @param String $type + * @param Array $params Parameters for the analyzer, usually at least a "class" + */ + function addAnalyzer($field, $type, $params) { + $fullFields = $this->fieldData($field); + if($fullFields) foreach($fullFields as $fullField => $spec) { + if(!isset($this->analyzerFields[$fullField])) $this->analyzerFields[$fullField] = array(); + $this->analyzerFields[$fullField][$type] = $params; + } + } + function getFieldDefinitions() { $xml = array(); $stored = Director::isDev() ? "stored='true'" : "stored='false'"; @@ -84,6 +104,13 @@ abstract class SolrIndex extends SearchIndex { $multiValued = (isset($spec['multi_valued']) && $spec['multi_valued']) ? "true" : ''; $type = isset($typeMap[$spec['type']]) ? $typeMap[$spec['type']] : $typeMap['*']; + $analyzerXml = ''; + if(isset($this->analyzerFields[$name])) { + foreach($this->analyzerFields[$name] as $analyzerType => $analyzerParams) { + $analyzerXml .= $this->toXmlTag($analyzerType, $analyzerParams); + } + } + $fieldParams = array_merge( array( 'name' => $name, @@ -97,7 +124,8 @@ abstract class SolrIndex extends SearchIndex { return $this->toXmlTag( "field", - $fieldParams + $fieldParams, + $analyzerXml ? "$analyzerXml" : null ); } diff --git a/docs/Solr.md b/docs/Solr.md index 088615d..de6ad79 100644 --- a/docs/Solr.md +++ b/docs/Solr.md @@ -76,6 +76,27 @@ You can also copy the `thirdparty/`solr directory somewhere else, just set the path value in `mysite/_config.php` to point to the new location. And of course run `java -jar start.jar` from the new directory. +### Adding Analyzers, Tokenizers and Token Filters + +When a document is indexed, its individual fields are subject to the analyzing and tokenizing filters that can transform and normalize the data in the fields. For example — removing blank spaces, removing html code, stemming, removing a particular character and replacing it with another +(see [Solr Wiki](http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters)). + +Example: Replace synonyms on indexing (e.g. "i-pad" with "iPad") + + addClass('Page'); + $this->addField('Content'); + $this->addAnalyzer('Content', 'filter', array('class' => 'solr.SynonymFilterFactory')); + } + } + + // Generates the following XML schema definition: + // + // + // + ## Debugging ### Using the web admin interface @@ -103,4 +124,4 @@ In order to query the field, reverse the search conditions and exclude the range // Wrong: Filter will ignore all empty field values $myQuery->filter(, new SearchQuery_Range('*', )); // Better: Exclude the opposite range - $myQuery->exclude(, new SearchQuery_Range(, '*')); \ No newline at end of file + $myQuery->exclude(, new SearchQuery_Range(, '*')); diff --git a/tests/SolrIndexTest.php b/tests/SolrIndexTest.php index afb8f2d..411b17b 100644 --- a/tests/SolrIndexTest.php +++ b/tests/SolrIndexTest.php @@ -65,6 +65,23 @@ class SolrIndexTest extends SapphireTest { Director::set_environment_type($origMode); } + + function testAddAnalyzer() { + $index = new SolrIndexTest_FakeIndex(); + + $defs = simplexml_load_string('' . $index->getFieldDefinitions() . ''); + $defField1 = $defs->xpath('field[@name="SearchUpdaterTest_Container_Field1"]'); + $analyzers = $defField1[0]->analyzer; + $this->assertFalse((bool)$analyzers); + + $index->addAnalyzer('Field1', 'charFilter', array('class' => 'solr.HTMLStripCharFilterFactory')); + $defs = simplexml_load_string('' . $index->getFieldDefinitions() . ''); + $defField1 = $defs->xpath('field[@name="SearchUpdaterTest_Container_Field1"]'); + $analyzers = $defField1[0]->analyzer; + $this->assertTrue((bool)$analyzers); + $this->assertEquals('solr.HTMLStripCharFilterFactory', $analyzers[0]->charFilter[0]['class']); + } + protected function getServiceMock() { $serviceMock = Phockito::mock('SolrService'); $fakeResponse = new Apache_Solr_Response(new Apache_Solr_HttpTransport_Response(null, null, null)); From 874bd32300bf6735685b9dbabeee826e863d61a2 Mon Sep 17 00:00:00 2001 From: Ingo Schommer Date: Wed, 5 Sep 2012 18:16:40 +0200 Subject: [PATCH 3/3] ENHANCEMENT Solr->addCopyFields() --- code/solr/SolrIndex.php | 20 ++++++++++++++++++++ tests/SolrIndexTest.php | 14 ++++++++++++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/code/solr/SolrIndex.php b/code/solr/SolrIndex.php index 0dffeee..87a7bc1 100644 --- a/code/solr/SolrIndex.php +++ b/code/solr/SolrIndex.php @@ -26,6 +26,8 @@ abstract class SolrIndex extends SearchIndex { protected $analyzerFields = array(); + protected $copyFields = array(); + function generateSchema() { return $this->renderWith(Director::baseFolder() . '/fulltextsearch/conf/templates/schema.ss'); } @@ -148,6 +150,18 @@ abstract class SolrIndex extends SearchIndex { return $xml; } + /** + * @param String $source Composite field name (_) + * @param String $dest + */ + function addCopyField($source, $dest, $extraOptions = array()) { + if(!isset($this->copyFields[$source])) $this->copyFields[$source] = array(); + $this->copyFields[$source][] = array_merge( + array('source' => $source, 'dest' => $dest), + $extraOptions + ); + } + function getCopyFieldDefinitions() { $xml = array(); @@ -155,6 +169,12 @@ abstract class SolrIndex extends SearchIndex { $xml[] = ""; } + foreach ($this->copyFields as $source => $fields) { + foreach($fields as $fieldAttrs) { + $xml[] = $this->toXmlTag('copyField', $fieldAttrs); + } + } + return implode("\n\t", $xml); } diff --git a/tests/SolrIndexTest.php b/tests/SolrIndexTest.php index 411b17b..3abddb1 100644 --- a/tests/SolrIndexTest.php +++ b/tests/SolrIndexTest.php @@ -82,8 +82,18 @@ class SolrIndexTest extends SapphireTest { $this->assertEquals('solr.HTMLStripCharFilterFactory', $analyzers[0]->charFilter[0]['class']); } - protected function getServiceMock() { - $serviceMock = Phockito::mock('SolrService'); + function testAddCopyField() { + $index = new SolrIndexTest_FakeIndex(); + $index->addCopyField('sourceField', 'destField'); + $defs = simplexml_load_string('' . $index->getCopyFieldDefinitions() . ''); + $lastDef = array_pop($defs); + + $this->assertEquals('sourceField', $lastDef['source']); + $this->assertEquals('destField', $lastDef['dest']); + } + + protected function getServiceSpy() { + $serviceSpy = Phockito::spy('SolrService'); $fakeResponse = new Apache_Solr_Response(new Apache_Solr_HttpTransport_Response(null, null, null)); Phockito::when($serviceMock)