Merge branch 'pull/field-defs-analyzers' of github.com:silverstripe-big-o/silverstripe-fulltextsearch into silverstripe-big-o-pull/field-defs-analyzers

This commit is contained in:
Sean Harvey 2013-02-13 10:40:08 +13:00
commit 3508aadf56
4 changed files with 176 additions and 24 deletions

View File

@ -47,7 +47,7 @@ abstract class SearchIndex extends ViewableData {
* Examines the classes this index is built on to try and find defined fields in the class hierarchy for those classes. * Examines the classes this index is built on to try and find defined fields in the class hierarchy for those classes.
* Looks for db and viewable-data fields, although can't nessecarily find type for viewable-data fields. * Looks for db and viewable-data fields, although can't nessecarily find type for viewable-data fields.
*/ */
function fieldData($field, $forceType = null) { function fieldData($field, $forceType = null, $extraOptions = array()) {
$fullfield = str_replace(".", "_", $field); $fullfield = str_replace(".", "_", $field);
$sources = $this->getClasses(); $sources = $this->getClasses();
@ -150,7 +150,8 @@ abstract class SearchIndex extends ViewableData {
'class' => $dataclass, 'class' => $dataclass,
'lookup_chain' => $fieldoptions['lookup_chain'], 'lookup_chain' => $fieldoptions['lookup_chain'],
'type' => $forceType ? $forceType : $type, 'type' => $forceType ? $forceType : $type,
'multi_valued' => isset($fieldoptions['multi_valued']) ? true : false 'multi_valued' => isset($fieldoptions['multi_valued']) ? true : false,
'extra_options' => $extraOptions
); );
} }
} }
@ -202,9 +203,10 @@ abstract class SearchIndex extends ViewableData {
* Add a field that should be fulltext searchable * Add a field that should be fulltext searchable
* @param String $field - The field to add * @param String $field - The field to add
* @param String $forceType - The type to force this field as (required in some cases, when not detectable from metadata) * @param String $forceType - The type to force this field as (required in some cases, when not detectable from metadata)
* @param String $extraOptions - Dependent on search implementation
*/ */
public function addFulltextField($field, $forceType = null) { public function addFulltextField($field, $forceType = null, $extraOptions = array()) {
$this->fulltextFields = array_merge($this->fulltextFields, $this->fieldData($field, $forceType)); $this->fulltextFields = array_merge($this->fulltextFields, $this->fieldData($field, $forceType, $extraOptions));
} }
public function getFulltextFields() { return $this->fulltextFields; } public function getFulltextFields() { return $this->fulltextFields; }
@ -213,9 +215,10 @@ abstract class SearchIndex extends ViewableData {
* Add a field that should be filterable * Add a field that should be filterable
* @param String $field - The field to add * @param String $field - The field to add
* @param String $forceType - The type to force this field as (required in some cases, when not detectable from metadata) * @param String $forceType - The type to force this field as (required in some cases, when not detectable from metadata)
* @param String $extraOptions - Dependent on search implementation
*/ */
public function addFilterField($field, $forceType = null) { public function addFilterField($field, $forceType = null, $extraOptions = array()) {
$this->filterFields = array_merge($this->filterFields, $this->fieldData($field, $forceType)); $this->filterFields = array_merge($this->filterFields, $this->fieldData($field, $forceType, $extraOptions));
} }
public function getFilterFields() { return $this->filterFields; } public function getFilterFields() { return $this->filterFields; }
@ -224,9 +227,10 @@ abstract class SearchIndex extends ViewableData {
* Add a field that should be sortable * Add a field that should be sortable
* @param String $field - The field to add * @param String $field - The field to add
* @param String $forceType - The type to force this field as (required in some cases, when not detectable from metadata) * @param String $forceType - The type to force this field as (required in some cases, when not detectable from metadata)
* @param String $extraOptions - Dependent on search implementation
*/ */
public function addSortField($field, $forceType = null) { public function addSortField($field, $forceType = null, $extraOptions = array()) {
$this->sortFields = array_merge($this->sortFields, $this->fieldData($field, $forceType)); $this->sortFields = array_merge($this->sortFields, $this->fieldData($field, $forceType, $extraOptions));
} }
public function getSortFields() { return $this->sortFields; } public function getSortFields() { return $this->sortFields; }

View File

@ -24,6 +24,10 @@ abstract class SolrIndex extends SearchIndex {
static $sortTypeMap = array(); static $sortTypeMap = array();
protected $analyzerFields = array();
protected $copyFields = array();
protected $extrasPath = null; protected $extrasPath = null;
protected $templatesPath = null; protected $templatesPath = null;
@ -55,6 +59,24 @@ abstract class SolrIndex extends SearchIndex {
return $this->renderWith($this->getTemplatesPath() . '/types.ss'); return $this->renderWith($this->getTemplatesPath() . '/types.ss');
} }
/**
* Index-time analyzer which is applied to a specific field.
* Can be used to remove HTML tags, apply stemming, etc.
*
* @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.WhitespaceTokenizerFactory
*
* @param String $field
* @param String $type
* @param Array $params Parameters for the analyzer, usually at least a "class"
*/
function addAnalyzer($field, $type, $params) {
$fullFields = $this->fieldData($field);
if($fullFields) foreach($fullFields as $fullField => $spec) {
if(!isset($this->analyzerFields[$fullField])) $this->analyzerFields[$fullField] = array();
$this->analyzerFields[$fullField][$type] = $params;
}
}
function getFieldDefinitions() { function getFieldDefinitions() {
$xml = array(); $xml = array();
$stored = Director::isDev() ? "stored='true'" : "stored='false'"; $stored = Director::isDev() ? "stored='true'" : "stored='false'";
@ -76,33 +98,89 @@ abstract class SolrIndex extends SearchIndex {
// Add the user-specified fields // Add the user-specified fields
foreach ($this->fulltextFields as $name => $field) { foreach ($this->fulltextFields as $name => $field) {
$type = isset(self::$fulltextTypeMap[$field['type']]) ? self::$fulltextTypeMap[$field['type']] : self::$fulltextTypeMap['*']; $xml[] = $this->getFieldDefinition($name, $field, self::$fulltextTypeMap);
$xml[] = "<field name='{$name}' type='$type' indexed='true' $stored />";
} }
foreach ($this->filterFields as $name => $field) { foreach ($this->filterFields as $name => $field) {
if ($field['fullfield'] == 'ID' || $field['fullfield'] == 'ClassName') continue; if ($field['fullfield'] == 'ID' || $field['fullfield'] == 'ClassName') continue;
$xml[] = $this->getFieldDefinition($name, $field);
$multiValued = (isset($field['multi_valued']) && $field['multi_valued']) ? "multiValued='true'" : '';
$type = isset(self::$filterTypeMap[$field['type']]) ? self::$filterTypeMap[$field['type']] : self::$filterTypeMap['*'];
$xml[] = "<field name='{$name}' type='{$type}' indexed='true' $stored $multiValued />";
} }
foreach ($this->sortFields as $name => $field) { foreach ($this->sortFields as $name => $field) {
if ($field['fullfield'] == 'ID' || $field['fullfield'] == 'ClassName') continue; if ($field['fullfield'] == 'ID' || $field['fullfield'] == 'ClassName') continue;
$xml[] = $this->getFieldDefinition($name, $field);
$multiValued = (isset($field['multi_valued']) && $field['multi_valued']) ? "multiValued='true'" : '';
$typeMap = array_merge(self::$filterTypeMap, self::$sortTypeMap);
$type = isset($typeMap[$field['type']]) ? $typeMap[$field['type']] : $typeMap['*'];
$xml[] = "<field name='{$name}' type='{$type}' indexed='true' $stored $multiValued />";
} }
return implode("\n\t\t", $xml); return implode("\n\t\t", $xml);
} }
/**
* @param String $name
* @param Array $spec
* @param Array $typeMap
* @return String XML
*/
protected function getFieldDefinition($name, $spec, $typeMap = null) {
if(!$typeMap) $typeMap = self::$filterTypeMap;
$multiValued = (isset($spec['multi_valued']) && $spec['multi_valued']) ? "true" : '';
$type = isset($typeMap[$spec['type']]) ? $typeMap[$spec['type']] : $typeMap['*'];
$analyzerXml = '';
if(isset($this->analyzerFields[$name])) {
foreach($this->analyzerFields[$name] as $analyzerType => $analyzerParams) {
$analyzerXml .= $this->toXmlTag($analyzerType, $analyzerParams);
}
}
$fieldParams = array_merge(
array(
'name' => $name,
'type' => $type,
'indexed' => 'true',
'stored' => Director::isDev() ? 'true' : 'false',
'multiValued' => $multiValued
),
isset($spec['extra_options']) ? $spec['extra_options'] : array()
);
return $this->toXmlTag(
"field",
$fieldParams,
$analyzerXml ? "<analyzer>$analyzerXml</analyzer>" : null
);
}
/**
* Convert definition to XML tag
*
* @param String $tag
* @param String $attrs Map of attributes
* @param String $content Inner content
* @return String XML tag
*/
protected function toXmlTag($tag, $attrs, $content = null) {
$xml = "<$tag ";
if($attrs) {
$attrStrs = array();
foreach($attrs as $attrName => $attrVal) $attrStrs[] = "$attrName='$attrVal'";
$xml .= $attrStrs ? implode(' ', $attrStrs) : '';
}
$xml .= $content ? ">$content</$tag>" : '/>';
return $xml;
}
/**
* @param String $source Composite field name (<class>_<fieldname>)
* @param String $dest
*/
function addCopyField($source, $dest, $extraOptions = array()) {
if(!isset($this->copyFields[$source])) $this->copyFields[$source] = array();
$this->copyFields[$source][] = array_merge(
array('source' => $source, 'dest' => $dest),
$extraOptions
);
}
function getCopyFieldDefinitions() { function getCopyFieldDefinitions() {
$xml = array(); $xml = array();
@ -110,6 +188,12 @@ abstract class SolrIndex extends SearchIndex {
$xml[] = "<copyField source='{$name}' dest='_text' />"; $xml[] = "<copyField source='{$name}' dest='_text' />";
} }
foreach ($this->copyFields as $source => $fields) {
foreach($fields as $fieldAttrs) {
$xml[] = $this->toXmlTag('copyField', $fieldAttrs);
}
}
return implode("\n\t", $xml); return implode("\n\t", $xml);
} }

View File

@ -285,6 +285,27 @@ The searched term is highlighted with an `<em>` tag by default.
Note: It is recommended to strip out all HTML tags and convert entities on the indexed content, Note: It is recommended to strip out all HTML tags and convert entities on the indexed content,
to avoid matching HTML attributes, and cluttering highlighted content with unparsed HTML. to avoid matching HTML attributes, and cluttering highlighted content with unparsed HTML.
### Adding Analyzers, Tokenizers and Token Filters
When a document is indexed, its individual fields are subject to the analyzing and tokenizing filters that can transform and normalize the data in the fields. For example — removing blank spaces, removing html code, stemming, removing a particular character and replacing it with another
(see [Solr Wiki](http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters)).
Example: Replace synonyms on indexing (e.g. "i-pad" with "iPad")
<?php
class MyIndex extends SolrIndex {
function init() {
$this->addClass('Page');
$this->addField('Content');
$this->addAnalyzer('Content', 'filter', array('class' => 'solr.SynonymFilterFactory'));
}
}
// Generates the following XML schema definition:
// <field name="Page_Content" ...>
// <filter class="solr.SynonymFilterFactory" synonyms="syn.txt" ignoreCase="true" expand="false"/>
// </field>
## Debugging ## Debugging
### Using the web admin interface ### Using the web admin interface

View File

@ -49,8 +49,51 @@ class SolrIndexTest extends SapphireTest {
$this->assertEquals('2010-12-30T00:00:00Z', $value['value'], 'Writes non-NULL dates'); $this->assertEquals('2010-12-30T00:00:00Z', $value['value'], 'Writes non-NULL dates');
} }
protected function getServiceMock() { function testAddFieldExtraOptions() {
$serviceMock = Phockito::mock('SolrService'); $origMode = Director::get_environment_type();
Director::set_environment_type('live'); // dev mode would for stored=true for everything
$index = new SolrIndexTest_FakeIndex();
$defs = simplexml_load_string('<fields>' . $index->getFieldDefinitions() . '</fields>');
$defField1 = $defs->xpath('field[@name="SearchUpdaterTest_Container_Field1"]');
$this->assertEquals((string)$defField1[0]['stored'], 'false');
$index->addFilterField('Field1', null, array('stored' => 'true'));
$defs = simplexml_load_string('<fields>' . $index->getFieldDefinitions() . '</fields>');
$defField1 = $defs->xpath('field[@name="SearchUpdaterTest_Container_Field1"]');
$this->assertEquals((string)$defField1[0]['stored'], 'true');
Director::set_environment_type($origMode);
}
function testAddAnalyzer() {
$index = new SolrIndexTest_FakeIndex();
$defs = simplexml_load_string('<fields>' . $index->getFieldDefinitions() . '</fields>');
$defField1 = $defs->xpath('field[@name="SearchUpdaterTest_Container_Field1"]');
$analyzers = $defField1[0]->analyzer;
$this->assertFalse((bool)$analyzers);
$index->addAnalyzer('Field1', 'charFilter', array('class' => 'solr.HTMLStripCharFilterFactory'));
$defs = simplexml_load_string('<fields>' . $index->getFieldDefinitions() . '</fields>');
$defField1 = $defs->xpath('field[@name="SearchUpdaterTest_Container_Field1"]');
$analyzers = $defField1[0]->analyzer;
$this->assertTrue((bool)$analyzers);
$this->assertEquals('solr.HTMLStripCharFilterFactory', $analyzers[0]->charFilter[0]['class']);
}
function testAddCopyField() {
$index = new SolrIndexTest_FakeIndex();
$index->addCopyField('sourceField', 'destField');
$defs = simplexml_load_string('<fields>' . $index->getCopyFieldDefinitions() . '</fields>');
$lastDef = array_pop($defs);
$this->assertEquals('sourceField', $lastDef['source']);
$this->assertEquals('destField', $lastDef['dest']);
}
protected function getServiceSpy() {
$serviceSpy = Phockito::spy('SolrService');
$fakeResponse = new Apache_Solr_Response(new Apache_Solr_HttpTransport_Response(null, null, null)); $fakeResponse = new Apache_Solr_Response(new Apache_Solr_HttpTransport_Response(null, null, null));
Phockito::when($serviceMock) Phockito::when($serviceMock)