Merge pull request #75 from tractorcow/spelling-suggestions

API Support custom files / spelling correction
This commit is contained in:
Hamish Friedlander 2015-07-01 14:52:37 +12:00
commit 931a1105a9
3 changed files with 151 additions and 45 deletions

View File

@ -96,6 +96,12 @@ class Solr {
/** @var [SolrService_Core] - The instances of SolrService_Core for each core */
static protected $service_core_singletons = array();
/**
* Get a SolrService
*
* @param string $core Optional core name
* @return SolrService_Core
*/
static function service($core = null) {
$options = self::solr_options();
@ -143,58 +149,82 @@ class Solr {
class Solr_Configure extends BuildTask {
public function run($request) {
$service = Solr::service();
// Find the IndexStore handler, which will handle uploading config files to Solr
$store = $this->getSolrConfigStore();
$indexes = Solr::get_indexes();
$options = Solr::solr_options();
foreach ($indexes as $instance) {
try {
$this->updateIndex($instance, $store);
} catch(Exception $e) {
// We got an exception. Warn, but continue to next index.
$this->log("Failure: " . $e->getMessage());
}
}
}
/**
* Update the index on the given store
*
* @param SolrIndex $instance Instance
* @param SolrConfigStore $store
*/
protected function updateIndex($instance, $store) {
$index = $instance->getIndexName();
$this->log("Configuring $index.");
$this->log("Uploading configuration ... ");
// Upload the config files for this index
$instance->uploadConfig($store);
// Then tell Solr to use those config files
$service = Solr::service();
if ($service->coreIsActive($index)) {
$this->log("Reloading core ...");
$service->coreReload($index);
} else {
$this->log("Creating core ...");
$service->coreCreate($index, $store->instanceDir($index));
}
$this->log("Done");
}
/**
* Get config store
*
* @return SolrConfigStore
*/
protected function getSolrConfigStore() {
$options = Solr::solr_options();
if (!isset($options['indexstore']) || !($indexstore = $options['indexstore'])) {
user_error('No index configuration for Solr provided', E_USER_ERROR);
}
// Find the IndexStore handler, which will handle uploading config files to Solr
$mode = $indexstore['mode'];
if ($mode == 'file') {
$store = new SolrConfigStore_File($indexstore);
return new SolrConfigStore_File($indexstore);
} elseif ($mode == 'webdav') {
$store = new SolrConfigStore_WebDAV($indexstore);
return new SolrConfigStore_WebDAV($indexstore);
} elseif (ClassInfo::exists($mode) && ClassInfo::classImplements($mode, 'SolrConfigStore')) {
$store = new $mode($indexstore);
return new $mode($indexstore);
} else {
user_error('Unknown Solr index mode '.$indexstore['mode'], E_USER_ERROR);
}
foreach ($indexes as $instance) {
$index = $instance->getIndexName();
echo "Configuring $index. \n"; flush();
try {
// Upload the config files for this index
echo "Uploading configuration ... \n"; flush();
$store->uploadString($index, 'schema.xml', (string)$instance->generateSchema());
foreach (glob($instance->getExtrasPath().'/*') as $file) {
if (is_file($file)) $store->uploadFile($index, $file);
}
// Then tell Solr to use those config files
if ($service->coreIsActive($index)) {
echo "Reloading core ... \n";
$service->coreReload($index);
} else {
echo "Creating core ... \n";
$service->coreCreate($index, $store->instanceDir($index));
}
// And done
echo "Done\n";
} catch(Exception $e) {
// We got an exception. Warn, but continue to next index.
echo "Failure: " . $e->getMessage() . "\n"; flush();
}
}
protected function log($message) {
if(Director::is_cli()) {
echo $message . "\n";
} else {
echo Convert::raw2xml($message) . "<br />";
}
flush();
}
}

View File

@ -141,6 +141,26 @@ abstract class SolrIndex extends SearchIndex {
return implode("\n\t\t", $xml);
}
/**
* Extract first suggestion text from collated values
*
* @param mixed $collation
* @return string
*/
protected function getCollatedSuggestion($collation = '') {
if(is_string($collation)) {
return $collation;
}
if(is_object($collation)) {
if(isset($collation->misspellingsAndCorrections)) {
foreach($collation->misspellingsAndCorrections as $key => $value) {
return $value;
}
}
}
return '';
}
/**
* Extract a human friendly spelling suggestion from a Solr spellcheck collation string.
@ -472,14 +492,18 @@ abstract class SolrIndex extends SearchIndex {
* @param SearchQuery $query
* @param integer $offset
* @param integer $limit
* @param Array $params Extra request parameters passed through to Solr
* @param array $params Extra request parameters passed through to Solr
* @return ArrayData Map with the following keys:
* - 'Matches': ArrayList of the matched object instances
*/
public function search(SearchQuery $query, $offset = -1, $limit = -1, $params = array()) {
$service = $this->getService();
SearchVariant::with(count($query->classes) == 1 ? $query->classes[0]['class'] : null)->call('alterQuery', $query, $this);
$searchClass = count($query->classes) == 1
? $query->classes[0]['class']
: null;
SearchVariant::with($searchClass)
->call('alterQuery', $query, $this);
$q = array(); // Query
$fq = array(); // Filter query
@ -665,15 +689,18 @@ abstract class SolrIndex extends SearchIndex {
// Suggestions. Requires spellcheck.collate=true in $params
if(isset($res->spellcheck->suggestions->collation)) {
// Extract string suggestion
$suggestion = $this->getCollatedSuggestion($res->spellcheck->suggestions->collation);
// The collation, including advanced query params (e.g. +), suitable for making another query programmatically.
$ret['Suggestion'] = $res->spellcheck->suggestions->collation;
$ret['Suggestion'] = $suggestion;
// A human friendly version of the suggestion, suitable for 'Did you mean $SuggestionNice?' display.
$ret['SuggestionNice'] = $this->getNiceSuggestion($res->spellcheck->suggestions->collation);
$ret['SuggestionNice'] = $this->getNiceSuggestion($suggestion);
// A string suitable for appending to an href as a query string.
// For example <a href="http://example.com/search?q=$SuggestionQueryString">$SuggestionNice</a>
$ret['SuggestionQueryString'] = $this->getSuggestionQueryString($res->spellcheck->suggestions->collation);
$ret['SuggestionQueryString'] = $this->getSuggestionQueryString($suggestion);
}
}
@ -694,4 +721,25 @@ abstract class SolrIndex extends SearchIndex {
$this->service = $service;
return $this;
}
/**
* Upload config for this index to the given store
*
* @param SolrConfigStore $store
*/
public function uploadConfig($store) {
// Upload the config files for this index
$store->uploadString(
$this->getIndexName(),
'schema.xml',
(string)$this->generateSchema()
);
// Upload additional files
foreach (glob($this->getExtrasPath().'/*') as $file) {
if (is_file($file)) {
$store->uploadFile($this->getIndexName(), $file);
}
}
}
}

View File

@ -863,6 +863,10 @@
<str>nameOfCustomComponent2</str>
</arr>
-->
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
<!-- A request handler that returns indented JSON by default -->
@ -1254,7 +1258,31 @@
<!-- a spellchecker built from a field of the main index -->
<lst name="spellchecker">
<str name="name">default</str>
<str name="field">text</str>
<str name="field">_text</str>
<str name="classname">solr.DirectSolrSpellChecker</str>
<!-- the spellcheck distance measure used, the default is the internal levenshtein -->
<str name="distanceMeasure">internal</str>
<!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
<float name="accuracy">0.5</float>
<!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
<int name="maxEdits">2</int>
<!-- the minimum shared prefix when enumerating terms -->
<int name="minPrefix">1</int>
<!-- maximum number of inspections per result. -->
<int name="maxInspections">5</int>
<!-- minimum length of a query term to be considered for correction -->
<int name="minQueryLength">4</int>
<!-- maximum threshold of documents a query term can appear to be considered for correction -->
<float name="maxQueryFrequency">0.01</float>
<!-- uncomment this to require suggestions to occur in 1% of the documents
<float name="thresholdTokenFrequency">.01</float>
-->
</lst>
<!-- Custom spellcheck dictionary limited to an optional _spellcheckText field -->
<lst name="spellchecker">
<str name="name">_spellcheck</str>
<str name="field">_spellcheckText</str>
<str name="classname">solr.DirectSolrSpellChecker</str>
<!-- the spellcheck distance measure used, the default is the internal levenshtein -->
<str name="distanceMeasure">internal</str>
@ -1279,7 +1307,7 @@
<lst name="spellchecker">
<str name="name">wordbreak</str>
<str name="classname">solr.WordBreakSolrSpellChecker</str>
<str name="field">name</str>
<str name="field">_text</str>
<str name="combineWords">true</str>
<str name="breakWords">true</str>
<int name="maxChanges">10</int>