API Additional support for custom copy_fields

API Additional textSpellHtml type for html-safe spelling database generation
This commit is contained in:
Damian Mooyman 2015-07-06 17:12:52 +12:00
parent 931a1105a9
commit ddb0b2520a
3 changed files with 48 additions and 5 deletions

View File

@ -46,6 +46,14 @@ abstract class SolrIndex extends SearchIndex {
* @config
*/
private static $default_field = '_text';
/**
* List of copy fields all fulltext fields should be copied into.
* This will fallback to default_field if not specified
*
* @var array
*/
private static $copy_fields = array();
/**
* @return String Absolute path to the folder containing
@ -104,6 +112,22 @@ abstract class SolrIndex extends SearchIndex {
return $this->config()->default_field;
}
/**
* Get list of fields each text field should be copied into.
* This will fallback to the default field if omitted.
*
* @return array
*/
protected function getCopyDestinations() {
$copyFields = $this->config()->copy_fields;
if($copyFields) {
return $copyFields;
}
// Fallback to default field
$df = $this->getDefaultField();
return array($df);
}
public function getFieldDefinitions() {
$xml = array();
$stored = $this->getStoredDefault();
@ -365,14 +389,22 @@ abstract class SolrIndex extends SearchIndex {
);
}
function getCopyFieldDefinitions() {
/**
* Generate XML for copy field definitions
*
* @return string
*/
public function getCopyFieldDefinitions() {
$xml = array();
$df = $this->getDefaultField();
foreach ($this->fulltextFields as $name => $field) {
$xml[] = "<copyField source='{$name}' dest='{$df}' />";
// Default copy fields
foreach($this->getCopyDestinations() as $copyTo) {
foreach ($this->fulltextFields as $name => $field) {
$xml[] = "<copyField source='{$name}' dest='{$copyTo}' />";
}
}
// Explicit copy fields
foreach ($this->copyFields as $source => $fields) {
foreach($fields as $fieldAttrs) {
$xml[] = $this->toXmlTag('copyField', $fieldAttrs);

View File

@ -203,6 +203,17 @@
</analyzer>
</fieldType>
<!-- Text optimized for spelling corrections, with minimal alterations (e.g. no stemming) but also html filtering -->
<fieldType name="textSpellHtml" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<charFilter class="solr.HTMLStripCharFilterFactory"/>
<tokenizer class="solr.StandardTokenizerFactory" />
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.LengthFilterFactory" min="4" max="20" />
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
</analyzer>
</fieldType>
<!-- A general unstemmed text field - good if one does not know the language of the field -->
<fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">

View File

@ -183,7 +183,7 @@ This can be fixed by aggregating spell checking data in a separate
$xml = parent::getFieldDefinitions();
$xml .= "\n\n\t\t<!-- Additional custom fields for spell checking -->";
$xml .= "\n\t\t<field name='spellcheckData' type='textSpell' indexed='true' stored='false' multiValued='true' />";
$xml .= "\n\t\t<field name='spellcheckData' type='textSpellHtml' indexed='true' stored='false' multiValued='true' />";
return $xml;
}