mirror of
https://github.com/silverstripe/silverstripe-fulltextsearch
synced 2024-10-22 12:05:29 +00:00
Merge branch 'pull/field-defs-analyzers' of github.com:silverstripe-big-o/silverstripe-fulltextsearch into silverstripe-big-o-pull/field-defs-analyzers
This commit is contained in:
commit
3508aadf56
@ -47,7 +47,7 @@ abstract class SearchIndex extends ViewableData {
|
||||
* Examines the classes this index is built on to try and find defined fields in the class hierarchy for those classes.
|
||||
* Looks for db and viewable-data fields, although can't nessecarily find type for viewable-data fields.
|
||||
*/
|
||||
function fieldData($field, $forceType = null) {
|
||||
function fieldData($field, $forceType = null, $extraOptions = array()) {
|
||||
$fullfield = str_replace(".", "_", $field);
|
||||
$sources = $this->getClasses();
|
||||
|
||||
@ -150,7 +150,8 @@ abstract class SearchIndex extends ViewableData {
|
||||
'class' => $dataclass,
|
||||
'lookup_chain' => $fieldoptions['lookup_chain'],
|
||||
'type' => $forceType ? $forceType : $type,
|
||||
'multi_valued' => isset($fieldoptions['multi_valued']) ? true : false
|
||||
'multi_valued' => isset($fieldoptions['multi_valued']) ? true : false,
|
||||
'extra_options' => $extraOptions
|
||||
);
|
||||
}
|
||||
}
|
||||
@ -202,9 +203,10 @@ abstract class SearchIndex extends ViewableData {
|
||||
* Add a field that should be fulltext searchable
|
||||
* @param String $field - The field to add
|
||||
* @param String $forceType - The type to force this field as (required in some cases, when not detectable from metadata)
|
||||
* @param String $extraOptions - Dependent on search implementation
|
||||
*/
|
||||
public function addFulltextField($field, $forceType = null) {
|
||||
$this->fulltextFields = array_merge($this->fulltextFields, $this->fieldData($field, $forceType));
|
||||
public function addFulltextField($field, $forceType = null, $extraOptions = array()) {
|
||||
$this->fulltextFields = array_merge($this->fulltextFields, $this->fieldData($field, $forceType, $extraOptions));
|
||||
}
|
||||
|
||||
public function getFulltextFields() { return $this->fulltextFields; }
|
||||
@ -213,9 +215,10 @@ abstract class SearchIndex extends ViewableData {
|
||||
* Add a field that should be filterable
|
||||
* @param String $field - The field to add
|
||||
* @param String $forceType - The type to force this field as (required in some cases, when not detectable from metadata)
|
||||
* @param String $extraOptions - Dependent on search implementation
|
||||
*/
|
||||
public function addFilterField($field, $forceType = null) {
|
||||
$this->filterFields = array_merge($this->filterFields, $this->fieldData($field, $forceType));
|
||||
public function addFilterField($field, $forceType = null, $extraOptions = array()) {
|
||||
$this->filterFields = array_merge($this->filterFields, $this->fieldData($field, $forceType, $extraOptions));
|
||||
}
|
||||
|
||||
public function getFilterFields() { return $this->filterFields; }
|
||||
@ -224,9 +227,10 @@ abstract class SearchIndex extends ViewableData {
|
||||
* Add a field that should be sortable
|
||||
* @param String $field - The field to add
|
||||
* @param String $forceType - The type to force this field as (required in some cases, when not detectable from metadata)
|
||||
* @param String $extraOptions - Dependent on search implementation
|
||||
*/
|
||||
public function addSortField($field, $forceType = null) {
|
||||
$this->sortFields = array_merge($this->sortFields, $this->fieldData($field, $forceType));
|
||||
public function addSortField($field, $forceType = null, $extraOptions = array()) {
|
||||
$this->sortFields = array_merge($this->sortFields, $this->fieldData($field, $forceType, $extraOptions));
|
||||
}
|
||||
|
||||
public function getSortFields() { return $this->sortFields; }
|
||||
|
@ -24,6 +24,10 @@ abstract class SolrIndex extends SearchIndex {
|
||||
|
||||
static $sortTypeMap = array();
|
||||
|
||||
protected $analyzerFields = array();
|
||||
|
||||
protected $copyFields = array();
|
||||
|
||||
protected $extrasPath = null;
|
||||
|
||||
protected $templatesPath = null;
|
||||
@ -55,6 +59,24 @@ abstract class SolrIndex extends SearchIndex {
|
||||
return $this->renderWith($this->getTemplatesPath() . '/types.ss');
|
||||
}
|
||||
|
||||
/**
|
||||
* Index-time analyzer which is applied to a specific field.
|
||||
* Can be used to remove HTML tags, apply stemming, etc.
|
||||
*
|
||||
* @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.WhitespaceTokenizerFactory
|
||||
*
|
||||
* @param String $field
|
||||
* @param String $type
|
||||
* @param Array $params Parameters for the analyzer, usually at least a "class"
|
||||
*/
|
||||
function addAnalyzer($field, $type, $params) {
|
||||
$fullFields = $this->fieldData($field);
|
||||
if($fullFields) foreach($fullFields as $fullField => $spec) {
|
||||
if(!isset($this->analyzerFields[$fullField])) $this->analyzerFields[$fullField] = array();
|
||||
$this->analyzerFields[$fullField][$type] = $params;
|
||||
}
|
||||
}
|
||||
|
||||
function getFieldDefinitions() {
|
||||
$xml = array();
|
||||
$stored = Director::isDev() ? "stored='true'" : "stored='false'";
|
||||
@ -76,33 +98,89 @@ abstract class SolrIndex extends SearchIndex {
|
||||
// Add the user-specified fields
|
||||
|
||||
foreach ($this->fulltextFields as $name => $field) {
|
||||
$type = isset(self::$fulltextTypeMap[$field['type']]) ? self::$fulltextTypeMap[$field['type']] : self::$fulltextTypeMap['*'];
|
||||
$xml[] = "<field name='{$name}' type='$type' indexed='true' $stored />";
|
||||
$xml[] = $this->getFieldDefinition($name, $field, self::$fulltextTypeMap);
|
||||
}
|
||||
|
||||
foreach ($this->filterFields as $name => $field) {
|
||||
if ($field['fullfield'] == 'ID' || $field['fullfield'] == 'ClassName') continue;
|
||||
|
||||
$multiValued = (isset($field['multi_valued']) && $field['multi_valued']) ? "multiValued='true'" : '';
|
||||
|
||||
$type = isset(self::$filterTypeMap[$field['type']]) ? self::$filterTypeMap[$field['type']] : self::$filterTypeMap['*'];
|
||||
$xml[] = "<field name='{$name}' type='{$type}' indexed='true' $stored $multiValued />";
|
||||
$xml[] = $this->getFieldDefinition($name, $field);
|
||||
}
|
||||
|
||||
foreach ($this->sortFields as $name => $field) {
|
||||
if ($field['fullfield'] == 'ID' || $field['fullfield'] == 'ClassName') continue;
|
||||
|
||||
$multiValued = (isset($field['multi_valued']) && $field['multi_valued']) ? "multiValued='true'" : '';
|
||||
|
||||
$typeMap = array_merge(self::$filterTypeMap, self::$sortTypeMap);
|
||||
$type = isset($typeMap[$field['type']]) ? $typeMap[$field['type']] : $typeMap['*'];
|
||||
|
||||
$xml[] = "<field name='{$name}' type='{$type}' indexed='true' $stored $multiValued />";
|
||||
$xml[] = $this->getFieldDefinition($name, $field);
|
||||
}
|
||||
|
||||
return implode("\n\t\t", $xml);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param String $name
|
||||
* @param Array $spec
|
||||
* @param Array $typeMap
|
||||
* @return String XML
|
||||
*/
|
||||
protected function getFieldDefinition($name, $spec, $typeMap = null) {
|
||||
if(!$typeMap) $typeMap = self::$filterTypeMap;
|
||||
$multiValued = (isset($spec['multi_valued']) && $spec['multi_valued']) ? "true" : '';
|
||||
$type = isset($typeMap[$spec['type']]) ? $typeMap[$spec['type']] : $typeMap['*'];
|
||||
|
||||
$analyzerXml = '';
|
||||
if(isset($this->analyzerFields[$name])) {
|
||||
foreach($this->analyzerFields[$name] as $analyzerType => $analyzerParams) {
|
||||
$analyzerXml .= $this->toXmlTag($analyzerType, $analyzerParams);
|
||||
}
|
||||
}
|
||||
|
||||
$fieldParams = array_merge(
|
||||
array(
|
||||
'name' => $name,
|
||||
'type' => $type,
|
||||
'indexed' => 'true',
|
||||
'stored' => Director::isDev() ? 'true' : 'false',
|
||||
'multiValued' => $multiValued
|
||||
),
|
||||
isset($spec['extra_options']) ? $spec['extra_options'] : array()
|
||||
);
|
||||
|
||||
return $this->toXmlTag(
|
||||
"field",
|
||||
$fieldParams,
|
||||
$analyzerXml ? "<analyzer>$analyzerXml</analyzer>" : null
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert definition to XML tag
|
||||
*
|
||||
* @param String $tag
|
||||
* @param String $attrs Map of attributes
|
||||
* @param String $content Inner content
|
||||
* @return String XML tag
|
||||
*/
|
||||
protected function toXmlTag($tag, $attrs, $content = null) {
|
||||
$xml = "<$tag ";
|
||||
if($attrs) {
|
||||
$attrStrs = array();
|
||||
foreach($attrs as $attrName => $attrVal) $attrStrs[] = "$attrName='$attrVal'";
|
||||
$xml .= $attrStrs ? implode(' ', $attrStrs) : '';
|
||||
}
|
||||
$xml .= $content ? ">$content</$tag>" : '/>';
|
||||
return $xml;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param String $source Composite field name (<class>_<fieldname>)
|
||||
* @param String $dest
|
||||
*/
|
||||
function addCopyField($source, $dest, $extraOptions = array()) {
|
||||
if(!isset($this->copyFields[$source])) $this->copyFields[$source] = array();
|
||||
$this->copyFields[$source][] = array_merge(
|
||||
array('source' => $source, 'dest' => $dest),
|
||||
$extraOptions
|
||||
);
|
||||
}
|
||||
|
||||
function getCopyFieldDefinitions() {
|
||||
$xml = array();
|
||||
|
||||
@ -110,6 +188,12 @@ abstract class SolrIndex extends SearchIndex {
|
||||
$xml[] = "<copyField source='{$name}' dest='_text' />";
|
||||
}
|
||||
|
||||
foreach ($this->copyFields as $source => $fields) {
|
||||
foreach($fields as $fieldAttrs) {
|
||||
$xml[] = $this->toXmlTag('copyField', $fieldAttrs);
|
||||
}
|
||||
}
|
||||
|
||||
return implode("\n\t", $xml);
|
||||
}
|
||||
|
||||
|
21
docs/Solr.md
21
docs/Solr.md
@ -285,6 +285,27 @@ The searched term is highlighted with an `<em>` tag by default.
|
||||
Note: It is recommended to strip out all HTML tags and convert entities on the indexed content,
|
||||
to avoid matching HTML attributes, and cluttering highlighted content with unparsed HTML.
|
||||
|
||||
### Adding Analyzers, Tokenizers and Token Filters
|
||||
|
||||
When a document is indexed, its individual fields are subject to the analyzing and tokenizing filters that can transform and normalize the data in the fields. For example — removing blank spaces, removing html code, stemming, removing a particular character and replacing it with another
|
||||
(see [Solr Wiki](http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters)).
|
||||
|
||||
Example: Replace synonyms on indexing (e.g. "i-pad" with "iPad")
|
||||
|
||||
<?php
|
||||
class MyIndex extends SolrIndex {
|
||||
function init() {
|
||||
$this->addClass('Page');
|
||||
$this->addField('Content');
|
||||
$this->addAnalyzer('Content', 'filter', array('class' => 'solr.SynonymFilterFactory'));
|
||||
}
|
||||
}
|
||||
|
||||
// Generates the following XML schema definition:
|
||||
// <field name="Page_Content" ...>
|
||||
// <filter class="solr.SynonymFilterFactory" synonyms="syn.txt" ignoreCase="true" expand="false"/>
|
||||
// </field>
|
||||
|
||||
## Debugging
|
||||
|
||||
### Using the web admin interface
|
||||
|
@ -49,8 +49,51 @@ class SolrIndexTest extends SapphireTest {
|
||||
$this->assertEquals('2010-12-30T00:00:00Z', $value['value'], 'Writes non-NULL dates');
|
||||
}
|
||||
|
||||
protected function getServiceMock() {
|
||||
$serviceMock = Phockito::mock('SolrService');
|
||||
function testAddFieldExtraOptions() {
|
||||
$origMode = Director::get_environment_type();
|
||||
Director::set_environment_type('live'); // dev mode would for stored=true for everything
|
||||
$index = new SolrIndexTest_FakeIndex();
|
||||
|
||||
$defs = simplexml_load_string('<fields>' . $index->getFieldDefinitions() . '</fields>');
|
||||
$defField1 = $defs->xpath('field[@name="SearchUpdaterTest_Container_Field1"]');
|
||||
$this->assertEquals((string)$defField1[0]['stored'], 'false');
|
||||
|
||||
$index->addFilterField('Field1', null, array('stored' => 'true'));
|
||||
$defs = simplexml_load_string('<fields>' . $index->getFieldDefinitions() . '</fields>');
|
||||
$defField1 = $defs->xpath('field[@name="SearchUpdaterTest_Container_Field1"]');
|
||||
$this->assertEquals((string)$defField1[0]['stored'], 'true');
|
||||
|
||||
Director::set_environment_type($origMode);
|
||||
}
|
||||
|
||||
function testAddAnalyzer() {
|
||||
$index = new SolrIndexTest_FakeIndex();
|
||||
|
||||
$defs = simplexml_load_string('<fields>' . $index->getFieldDefinitions() . '</fields>');
|
||||
$defField1 = $defs->xpath('field[@name="SearchUpdaterTest_Container_Field1"]');
|
||||
$analyzers = $defField1[0]->analyzer;
|
||||
$this->assertFalse((bool)$analyzers);
|
||||
|
||||
$index->addAnalyzer('Field1', 'charFilter', array('class' => 'solr.HTMLStripCharFilterFactory'));
|
||||
$defs = simplexml_load_string('<fields>' . $index->getFieldDefinitions() . '</fields>');
|
||||
$defField1 = $defs->xpath('field[@name="SearchUpdaterTest_Container_Field1"]');
|
||||
$analyzers = $defField1[0]->analyzer;
|
||||
$this->assertTrue((bool)$analyzers);
|
||||
$this->assertEquals('solr.HTMLStripCharFilterFactory', $analyzers[0]->charFilter[0]['class']);
|
||||
}
|
||||
|
||||
function testAddCopyField() {
|
||||
$index = new SolrIndexTest_FakeIndex();
|
||||
$index->addCopyField('sourceField', 'destField');
|
||||
$defs = simplexml_load_string('<fields>' . $index->getCopyFieldDefinitions() . '</fields>');
|
||||
$lastDef = array_pop($defs);
|
||||
|
||||
$this->assertEquals('sourceField', $lastDef['source']);
|
||||
$this->assertEquals('destField', $lastDef['dest']);
|
||||
}
|
||||
|
||||
protected function getServiceSpy() {
|
||||
$serviceSpy = Phockito::spy('SolrService');
|
||||
$fakeResponse = new Apache_Solr_Response(new Apache_Solr_HttpTransport_Response(null, null, null));
|
||||
|
||||
Phockito::when($serviceMock)
|
||||
|
Loading…
x
Reference in New Issue
Block a user