mirror of
https://github.com/silverstripe/silverstripe-fulltextsearch
synced 2024-10-22 12:05:29 +00:00
ENHANCEMENT Per-field analyser support for SolrIndex
This commit is contained in:
parent
b6fd7b7b89
commit
0ef78f905c
@ -24,6 +24,8 @@ abstract class SolrIndex extends SearchIndex {
|
||||
|
||||
static $sortTypeMap = array();
|
||||
|
||||
protected $analyzerFields = array();
|
||||
|
||||
function generateSchema() {
|
||||
return $this->renderWith(Director::baseFolder() . '/fulltextsearch/conf/templates/schema.ss');
|
||||
}
|
||||
@ -36,6 +38,24 @@ abstract class SolrIndex extends SearchIndex {
|
||||
return $this->renderWith(Director::baseFolder() . '/fulltextsearch/conf/templates/types.ss');
|
||||
}
|
||||
|
||||
/**
|
||||
* Index-time analyzer which is applied to a specific field.
|
||||
* Can be used to remove HTML tags, apply stemming, etc.
|
||||
*
|
||||
* @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.WhitespaceTokenizerFactory
|
||||
*
|
||||
* @param String $field
|
||||
* @param String $type
|
||||
* @param Array $params Parameters for the analyzer, usually at least a "class"
|
||||
*/
|
||||
function addAnalyzer($field, $type, $params) {
|
||||
$fullFields = $this->fieldData($field);
|
||||
if($fullFields) foreach($fullFields as $fullField => $spec) {
|
||||
if(!isset($this->analyzerFields[$fullField])) $this->analyzerFields[$fullField] = array();
|
||||
$this->analyzerFields[$fullField][$type] = $params;
|
||||
}
|
||||
}
|
||||
|
||||
function getFieldDefinitions() {
|
||||
$xml = array();
|
||||
$stored = Director::isDev() ? "stored='true'" : "stored='false'";
|
||||
@ -84,6 +104,13 @@ abstract class SolrIndex extends SearchIndex {
|
||||
$multiValued = (isset($spec['multi_valued']) && $spec['multi_valued']) ? "true" : '';
|
||||
$type = isset($typeMap[$spec['type']]) ? $typeMap[$spec['type']] : $typeMap['*'];
|
||||
|
||||
$analyzerXml = '';
|
||||
if(isset($this->analyzerFields[$name])) {
|
||||
foreach($this->analyzerFields[$name] as $analyzerType => $analyzerParams) {
|
||||
$analyzerXml .= $this->toXmlTag($analyzerType, $analyzerParams);
|
||||
}
|
||||
}
|
||||
|
||||
$fieldParams = array_merge(
|
||||
array(
|
||||
'name' => $name,
|
||||
@ -97,7 +124,8 @@ abstract class SolrIndex extends SearchIndex {
|
||||
|
||||
return $this->toXmlTag(
|
||||
"field",
|
||||
$fieldParams
|
||||
$fieldParams,
|
||||
$analyzerXml ? "<analyzer>$analyzerXml</analyzer>" : null
|
||||
);
|
||||
}
|
||||
|
||||
|
23
docs/Solr.md
23
docs/Solr.md
@ -76,6 +76,27 @@ You can also copy the `thirdparty/`solr directory somewhere else,
|
||||
just set the path value in `mysite/_config.php` to point to the new location.
|
||||
And of course run `java -jar start.jar` from the new directory.
|
||||
|
||||
### Adding Analyzers, Tokenizers and Token Filters
|
||||
|
||||
When a document is indexed, its individual fields are subject to the analyzing and tokenizing filters that can transform and normalize the data in the fields. For example — removing blank spaces, removing html code, stemming, removing a particular character and replacing it with another
|
||||
(see [Solr Wiki](http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters)).
|
||||
|
||||
Example: Replace synonyms on indexing (e.g. "i-pad" with "iPad")
|
||||
|
||||
<?php
|
||||
class MyIndex extends SolrIndex {
|
||||
function init() {
|
||||
$this->addClass('Page');
|
||||
$this->addField('Content');
|
||||
$this->addAnalyzer('Content', 'filter', array('class' => 'solr.SynonymFilterFactory'));
|
||||
}
|
||||
}
|
||||
|
||||
// Generates the following XML schema definition:
|
||||
// <field name="Page_Content" ...>
|
||||
// <filter class="solr.SynonymFilterFactory" synonyms="syn.txt" ignoreCase="true" expand="false"/>
|
||||
// </field>
|
||||
|
||||
## Debugging
|
||||
|
||||
### Using the web admin interface
|
||||
@ -103,4 +124,4 @@ In order to query the field, reverse the search conditions and exclude the range
|
||||
// Wrong: Filter will ignore all empty field values
|
||||
$myQuery->filter(<field>, new SearchQuery_Range('*', <date>));
|
||||
// Better: Exclude the opposite range
|
||||
$myQuery->exclude(<field>, new SearchQuery_Range(<date>, '*'));
|
||||
$myQuery->exclude(<field>, new SearchQuery_Range(<date>, '*'));
|
||||
|
@ -65,6 +65,23 @@ class SolrIndexTest extends SapphireTest {
|
||||
|
||||
Director::set_environment_type($origMode);
|
||||
}
|
||||
|
||||
function testAddAnalyzer() {
|
||||
$index = new SolrIndexTest_FakeIndex();
|
||||
|
||||
$defs = simplexml_load_string('<fields>' . $index->getFieldDefinitions() . '</fields>');
|
||||
$defField1 = $defs->xpath('field[@name="SearchUpdaterTest_Container_Field1"]');
|
||||
$analyzers = $defField1[0]->analyzer;
|
||||
$this->assertFalse((bool)$analyzers);
|
||||
|
||||
$index->addAnalyzer('Field1', 'charFilter', array('class' => 'solr.HTMLStripCharFilterFactory'));
|
||||
$defs = simplexml_load_string('<fields>' . $index->getFieldDefinitions() . '</fields>');
|
||||
$defField1 = $defs->xpath('field[@name="SearchUpdaterTest_Container_Field1"]');
|
||||
$analyzers = $defField1[0]->analyzer;
|
||||
$this->assertTrue((bool)$analyzers);
|
||||
$this->assertEquals('solr.HTMLStripCharFilterFactory', $analyzers[0]->charFilter[0]['class']);
|
||||
}
|
||||
|
||||
protected function getServiceMock() {
|
||||
$serviceMock = Phockito::mock('SolrService');
|
||||
$fakeResponse = new Apache_Solr_Response(new Apache_Solr_HttpTransport_Response(null, null, null));
|
||||
|
Loading…
x
Reference in New Issue
Block a user