diff --git a/code/solr/SolrIndex.php b/code/solr/SolrIndex.php
index 68d4f0f..0dffeee 100644
--- a/code/solr/SolrIndex.php
+++ b/code/solr/SolrIndex.php
@@ -24,6 +24,8 @@ abstract class SolrIndex extends SearchIndex {
static $sortTypeMap = array();
+ protected $analyzerFields = array();
+
function generateSchema() {
return $this->renderWith(Director::baseFolder() . '/fulltextsearch/conf/templates/schema.ss');
}
@@ -36,6 +38,24 @@ abstract class SolrIndex extends SearchIndex {
return $this->renderWith(Director::baseFolder() . '/fulltextsearch/conf/templates/types.ss');
}
+ /**
+ * Index-time analyzer which is applied to a specific field.
+ * Can be used to remove HTML tags, apply stemming, etc.
+ *
+ * @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.WhitespaceTokenizerFactory
+ *
+ * @param String $field
+ * @param String $type
+ * @param Array $params Parameters for the analyzer, usually at least a "class"
+ */
+ function addAnalyzer($field, $type, $params) {
+ $fullFields = $this->fieldData($field);
+ if($fullFields) foreach($fullFields as $fullField => $spec) {
+ if(!isset($this->analyzerFields[$fullField])) $this->analyzerFields[$fullField] = array();
+ $this->analyzerFields[$fullField][$type] = $params;
+ }
+ }
+
function getFieldDefinitions() {
$xml = array();
$stored = Director::isDev() ? "stored='true'" : "stored='false'";
@@ -84,6 +104,13 @@ abstract class SolrIndex extends SearchIndex {
$multiValued = (isset($spec['multi_valued']) && $spec['multi_valued']) ? "true" : '';
$type = isset($typeMap[$spec['type']]) ? $typeMap[$spec['type']] : $typeMap['*'];
+ $analyzerXml = '';
+ if(isset($this->analyzerFields[$name])) {
+ foreach($this->analyzerFields[$name] as $analyzerType => $analyzerParams) {
+ $analyzerXml .= $this->toXmlTag($analyzerType, $analyzerParams);
+ }
+ }
+
$fieldParams = array_merge(
array(
'name' => $name,
@@ -97,7 +124,8 @@ abstract class SolrIndex extends SearchIndex {
return $this->toXmlTag(
"field",
- $fieldParams
+ $fieldParams,
+ $analyzerXml ? "$analyzerXml" : null
);
}
diff --git a/docs/Solr.md b/docs/Solr.md
index 088615d..de6ad79 100644
--- a/docs/Solr.md
+++ b/docs/Solr.md
@@ -76,6 +76,27 @@ You can also copy the `thirdparty/`solr directory somewhere else,
just set the path value in `mysite/_config.php` to point to the new location.
And of course run `java -jar start.jar` from the new directory.
+### Adding Analyzers, Tokenizers and Token Filters
+
+When a document is indexed, its individual fields are subject to the analyzing and tokenizing filters that can transform and normalize the data in the fields. For example — removing blank spaces, removing html code, stemming, removing a particular character and replacing it with another
+(see [Solr Wiki](http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters)).
+
+Example: Replace synonyms on indexing (e.g. "i-pad" with "iPad")
+
+ addClass('Page');
+ $this->addField('Content');
+ $this->addAnalyzer('Content', 'filter', array('class' => 'solr.SynonymFilterFactory'));
+ }
+ }
+
+ // Generates the following XML schema definition:
+ //
+ //
+ //
+
## Debugging
### Using the web admin interface
@@ -103,4 +124,4 @@ In order to query the field, reverse the search conditions and exclude the range
// Wrong: Filter will ignore all empty field values
$myQuery->filter(, new SearchQuery_Range('*', ));
// Better: Exclude the opposite range
- $myQuery->exclude(, new SearchQuery_Range(, '*'));
\ No newline at end of file
+ $myQuery->exclude(, new SearchQuery_Range(, '*'));
diff --git a/tests/SolrIndexTest.php b/tests/SolrIndexTest.php
index afb8f2d..411b17b 100644
--- a/tests/SolrIndexTest.php
+++ b/tests/SolrIndexTest.php
@@ -65,6 +65,23 @@ class SolrIndexTest extends SapphireTest {
Director::set_environment_type($origMode);
}
+
+ function testAddAnalyzer() {
+ $index = new SolrIndexTest_FakeIndex();
+
+ $defs = simplexml_load_string('' . $index->getFieldDefinitions() . '');
+ $defField1 = $defs->xpath('field[@name="SearchUpdaterTest_Container_Field1"]');
+ $analyzers = $defField1[0]->analyzer;
+ $this->assertFalse((bool)$analyzers);
+
+ $index->addAnalyzer('Field1', 'charFilter', array('class' => 'solr.HTMLStripCharFilterFactory'));
+ $defs = simplexml_load_string('' . $index->getFieldDefinitions() . '');
+ $defField1 = $defs->xpath('field[@name="SearchUpdaterTest_Container_Field1"]');
+ $analyzers = $defField1[0]->analyzer;
+ $this->assertTrue((bool)$analyzers);
+ $this->assertEquals('solr.HTMLStripCharFilterFactory', $analyzers[0]->charFilter[0]['class']);
+ }
+
protected function getServiceMock() {
$serviceMock = Phockito::mock('SolrService');
$fakeResponse = new Apache_Solr_Response(new Apache_Solr_HttpTransport_Response(null, null, null));