mirror of
https://github.com/silverstripe/silverstripe-fulltextsearch
synced 2024-10-22 14:05:29 +02:00
Merge pull request #75 from tractorcow/spelling-suggestions
API Support custom files / spelling correction
This commit is contained in:
commit
931a1105a9
@ -96,6 +96,12 @@ class Solr {
|
||||
/** @var [SolrService_Core] - The instances of SolrService_Core for each core */
|
||||
static protected $service_core_singletons = array();
|
||||
|
||||
/**
|
||||
* Get a SolrService
|
||||
*
|
||||
* @param string $core Optional core name
|
||||
* @return SolrService_Core
|
||||
*/
|
||||
static function service($core = null) {
|
||||
$options = self::solr_options();
|
||||
|
||||
@ -143,58 +149,82 @@ class Solr {
|
||||
class Solr_Configure extends BuildTask {
|
||||
|
||||
public function run($request) {
|
||||
$service = Solr::service();
|
||||
// Find the IndexStore handler, which will handle uploading config files to Solr
|
||||
$store = $this->getSolrConfigStore();
|
||||
$indexes = Solr::get_indexes();
|
||||
$options = Solr::solr_options();
|
||||
foreach ($indexes as $instance) {
|
||||
|
||||
try {
|
||||
$this->updateIndex($instance, $store);
|
||||
} catch(Exception $e) {
|
||||
// We got an exception. Warn, but continue to next index.
|
||||
$this->log("Failure: " . $e->getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the index on the given store
|
||||
*
|
||||
* @param SolrIndex $instance Instance
|
||||
* @param SolrConfigStore $store
|
||||
*/
|
||||
protected function updateIndex($instance, $store) {
|
||||
$index = $instance->getIndexName();
|
||||
$this->log("Configuring $index.");
|
||||
$this->log("Uploading configuration ... ");
|
||||
|
||||
|
||||
// Upload the config files for this index
|
||||
$instance->uploadConfig($store);
|
||||
|
||||
// Then tell Solr to use those config files
|
||||
$service = Solr::service();
|
||||
if ($service->coreIsActive($index)) {
|
||||
$this->log("Reloading core ...");
|
||||
$service->coreReload($index);
|
||||
} else {
|
||||
$this->log("Creating core ...");
|
||||
$service->coreCreate($index, $store->instanceDir($index));
|
||||
}
|
||||
|
||||
$this->log("Done");
|
||||
}
|
||||
|
||||
/**
|
||||
* Get config store
|
||||
*
|
||||
* @return SolrConfigStore
|
||||
*/
|
||||
protected function getSolrConfigStore() {
|
||||
$options = Solr::solr_options();
|
||||
|
||||
if (!isset($options['indexstore']) || !($indexstore = $options['indexstore'])) {
|
||||
user_error('No index configuration for Solr provided', E_USER_ERROR);
|
||||
}
|
||||
|
||||
|
||||
// Find the IndexStore handler, which will handle uploading config files to Solr
|
||||
$mode = $indexstore['mode'];
|
||||
|
||||
if ($mode == 'file') {
|
||||
$store = new SolrConfigStore_File($indexstore);
|
||||
return new SolrConfigStore_File($indexstore);
|
||||
} elseif ($mode == 'webdav') {
|
||||
$store = new SolrConfigStore_WebDAV($indexstore);
|
||||
return new SolrConfigStore_WebDAV($indexstore);
|
||||
} elseif (ClassInfo::exists($mode) && ClassInfo::classImplements($mode, 'SolrConfigStore')) {
|
||||
$store = new $mode($indexstore);
|
||||
return new $mode($indexstore);
|
||||
} else {
|
||||
user_error('Unknown Solr index mode '.$indexstore['mode'], E_USER_ERROR);
|
||||
}
|
||||
|
||||
foreach ($indexes as $instance) {
|
||||
$index = $instance->getIndexName();
|
||||
echo "Configuring $index. \n"; flush();
|
||||
|
||||
try {
|
||||
// Upload the config files for this index
|
||||
echo "Uploading configuration ... \n"; flush();
|
||||
|
||||
$store->uploadString($index, 'schema.xml', (string)$instance->generateSchema());
|
||||
|
||||
foreach (glob($instance->getExtrasPath().'/*') as $file) {
|
||||
if (is_file($file)) $store->uploadFile($index, $file);
|
||||
}
|
||||
|
||||
// Then tell Solr to use those config files
|
||||
if ($service->coreIsActive($index)) {
|
||||
echo "Reloading core ... \n";
|
||||
$service->coreReload($index);
|
||||
} else {
|
||||
echo "Creating core ... \n";
|
||||
$service->coreCreate($index, $store->instanceDir($index));
|
||||
}
|
||||
|
||||
// And done
|
||||
echo "Done\n";
|
||||
|
||||
} catch(Exception $e) {
|
||||
// We got an exception. Warn, but continue to next index.
|
||||
echo "Failure: " . $e->getMessage() . "\n"; flush();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
protected function log($message) {
|
||||
if(Director::is_cli()) {
|
||||
echo $message . "\n";
|
||||
} else {
|
||||
echo Convert::raw2xml($message) . "<br />";
|
||||
}
|
||||
flush();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -141,6 +141,26 @@ abstract class SolrIndex extends SearchIndex {
|
||||
|
||||
return implode("\n\t\t", $xml);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract first suggestion text from collated values
|
||||
*
|
||||
* @param mixed $collation
|
||||
* @return string
|
||||
*/
|
||||
protected function getCollatedSuggestion($collation = '') {
|
||||
if(is_string($collation)) {
|
||||
return $collation;
|
||||
}
|
||||
if(is_object($collation)) {
|
||||
if(isset($collation->misspellingsAndCorrections)) {
|
||||
foreach($collation->misspellingsAndCorrections as $key => $value) {
|
||||
return $value;
|
||||
}
|
||||
}
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract a human friendly spelling suggestion from a Solr spellcheck collation string.
|
||||
@ -472,14 +492,18 @@ abstract class SolrIndex extends SearchIndex {
|
||||
* @param SearchQuery $query
|
||||
* @param integer $offset
|
||||
* @param integer $limit
|
||||
* @param Array $params Extra request parameters passed through to Solr
|
||||
* @param array $params Extra request parameters passed through to Solr
|
||||
* @return ArrayData Map with the following keys:
|
||||
* - 'Matches': ArrayList of the matched object instances
|
||||
*/
|
||||
public function search(SearchQuery $query, $offset = -1, $limit = -1, $params = array()) {
|
||||
$service = $this->getService();
|
||||
|
||||
SearchVariant::with(count($query->classes) == 1 ? $query->classes[0]['class'] : null)->call('alterQuery', $query, $this);
|
||||
|
||||
$searchClass = count($query->classes) == 1
|
||||
? $query->classes[0]['class']
|
||||
: null;
|
||||
SearchVariant::with($searchClass)
|
||||
->call('alterQuery', $query, $this);
|
||||
|
||||
$q = array(); // Query
|
||||
$fq = array(); // Filter query
|
||||
@ -665,15 +689,18 @@ abstract class SolrIndex extends SearchIndex {
|
||||
|
||||
// Suggestions. Requires spellcheck.collate=true in $params
|
||||
if(isset($res->spellcheck->suggestions->collation)) {
|
||||
// Extract string suggestion
|
||||
$suggestion = $this->getCollatedSuggestion($res->spellcheck->suggestions->collation);
|
||||
|
||||
// The collation, including advanced query params (e.g. +), suitable for making another query programmatically.
|
||||
$ret['Suggestion'] = $res->spellcheck->suggestions->collation;
|
||||
$ret['Suggestion'] = $suggestion;
|
||||
|
||||
// A human friendly version of the suggestion, suitable for 'Did you mean $SuggestionNice?' display.
|
||||
$ret['SuggestionNice'] = $this->getNiceSuggestion($res->spellcheck->suggestions->collation);
|
||||
$ret['SuggestionNice'] = $this->getNiceSuggestion($suggestion);
|
||||
|
||||
// A string suitable for appending to an href as a query string.
|
||||
// For example <a href="http://example.com/search?q=$SuggestionQueryString">$SuggestionNice</a>
|
||||
$ret['SuggestionQueryString'] = $this->getSuggestionQueryString($res->spellcheck->suggestions->collation);
|
||||
$ret['SuggestionQueryString'] = $this->getSuggestionQueryString($suggestion);
|
||||
}
|
||||
}
|
||||
|
||||
@ -694,4 +721,25 @@ abstract class SolrIndex extends SearchIndex {
|
||||
$this->service = $service;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Upload config for this index to the given store
|
||||
*
|
||||
* @param SolrConfigStore $store
|
||||
*/
|
||||
public function uploadConfig($store) {
|
||||
// Upload the config files for this index
|
||||
$store->uploadString(
|
||||
$this->getIndexName(),
|
||||
'schema.xml',
|
||||
(string)$this->generateSchema()
|
||||
);
|
||||
|
||||
// Upload additional files
|
||||
foreach (glob($this->getExtrasPath().'/*') as $file) {
|
||||
if (is_file($file)) {
|
||||
$store->uploadFile($this->getIndexName(), $file);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -863,6 +863,10 @@
|
||||
<str>nameOfCustomComponent2</str>
|
||||
</arr>
|
||||
-->
|
||||
|
||||
<arr name="last-components">
|
||||
<str>spellcheck</str>
|
||||
</arr>
|
||||
</requestHandler>
|
||||
|
||||
<!-- A request handler that returns indented JSON by default -->
|
||||
@ -1254,7 +1258,31 @@
|
||||
<!-- a spellchecker built from a field of the main index -->
|
||||
<lst name="spellchecker">
|
||||
<str name="name">default</str>
|
||||
<str name="field">text</str>
|
||||
<str name="field">_text</str>
|
||||
<str name="classname">solr.DirectSolrSpellChecker</str>
|
||||
<!-- the spellcheck distance measure used, the default is the internal levenshtein -->
|
||||
<str name="distanceMeasure">internal</str>
|
||||
<!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
|
||||
<float name="accuracy">0.5</float>
|
||||
<!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
|
||||
<int name="maxEdits">2</int>
|
||||
<!-- the minimum shared prefix when enumerating terms -->
|
||||
<int name="minPrefix">1</int>
|
||||
<!-- maximum number of inspections per result. -->
|
||||
<int name="maxInspections">5</int>
|
||||
<!-- minimum length of a query term to be considered for correction -->
|
||||
<int name="minQueryLength">4</int>
|
||||
<!-- maximum threshold of documents a query term can appear to be considered for correction -->
|
||||
<float name="maxQueryFrequency">0.01</float>
|
||||
<!-- uncomment this to require suggestions to occur in 1% of the documents
|
||||
<float name="thresholdTokenFrequency">.01</float>
|
||||
-->
|
||||
</lst>
|
||||
|
||||
<!-- Custom spellcheck dictionary limited to an optional _spellcheckText field -->
|
||||
<lst name="spellchecker">
|
||||
<str name="name">_spellcheck</str>
|
||||
<str name="field">_spellcheckText</str>
|
||||
<str name="classname">solr.DirectSolrSpellChecker</str>
|
||||
<!-- the spellcheck distance measure used, the default is the internal levenshtein -->
|
||||
<str name="distanceMeasure">internal</str>
|
||||
@ -1279,7 +1307,7 @@
|
||||
<lst name="spellchecker">
|
||||
<str name="name">wordbreak</str>
|
||||
<str name="classname">solr.WordBreakSolrSpellChecker</str>
|
||||
<str name="field">name</str>
|
||||
<str name="field">_text</str>
|
||||
<str name="combineWords">true</str>
|
||||
<str name="breakWords">true</str>
|
||||
<int name="maxChanges">10</int>
|
||||
|
Loading…
Reference in New Issue
Block a user