mirror of
https://github.com/silverstripe/silverstripe-fulltextsearch
synced 2024-10-22 14:05:29 +02:00
Merge pull request #75 from tractorcow/spelling-suggestions
API Support custom files / spelling correction
This commit is contained in:
commit
931a1105a9
@ -96,6 +96,12 @@ class Solr {
|
|||||||
/** @var [SolrService_Core] - The instances of SolrService_Core for each core */
|
/** @var [SolrService_Core] - The instances of SolrService_Core for each core */
|
||||||
static protected $service_core_singletons = array();
|
static protected $service_core_singletons = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a SolrService
|
||||||
|
*
|
||||||
|
* @param string $core Optional core name
|
||||||
|
* @return SolrService_Core
|
||||||
|
*/
|
||||||
static function service($core = null) {
|
static function service($core = null) {
|
||||||
$options = self::solr_options();
|
$options = self::solr_options();
|
||||||
|
|
||||||
@ -143,8 +149,54 @@ class Solr {
|
|||||||
class Solr_Configure extends BuildTask {
|
class Solr_Configure extends BuildTask {
|
||||||
|
|
||||||
public function run($request) {
|
public function run($request) {
|
||||||
$service = Solr::service();
|
// Find the IndexStore handler, which will handle uploading config files to Solr
|
||||||
|
$store = $this->getSolrConfigStore();
|
||||||
$indexes = Solr::get_indexes();
|
$indexes = Solr::get_indexes();
|
||||||
|
foreach ($indexes as $instance) {
|
||||||
|
|
||||||
|
try {
|
||||||
|
$this->updateIndex($instance, $store);
|
||||||
|
} catch(Exception $e) {
|
||||||
|
// We got an exception. Warn, but continue to next index.
|
||||||
|
$this->log("Failure: " . $e->getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update the index on the given store
|
||||||
|
*
|
||||||
|
* @param SolrIndex $instance Instance
|
||||||
|
* @param SolrConfigStore $store
|
||||||
|
*/
|
||||||
|
protected function updateIndex($instance, $store) {
|
||||||
|
$index = $instance->getIndexName();
|
||||||
|
$this->log("Configuring $index.");
|
||||||
|
$this->log("Uploading configuration ... ");
|
||||||
|
|
||||||
|
|
||||||
|
// Upload the config files for this index
|
||||||
|
$instance->uploadConfig($store);
|
||||||
|
|
||||||
|
// Then tell Solr to use those config files
|
||||||
|
$service = Solr::service();
|
||||||
|
if ($service->coreIsActive($index)) {
|
||||||
|
$this->log("Reloading core ...");
|
||||||
|
$service->coreReload($index);
|
||||||
|
} else {
|
||||||
|
$this->log("Creating core ...");
|
||||||
|
$service->coreCreate($index, $store->instanceDir($index));
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->log("Done");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get config store
|
||||||
|
*
|
||||||
|
* @return SolrConfigStore
|
||||||
|
*/
|
||||||
|
protected function getSolrConfigStore() {
|
||||||
$options = Solr::solr_options();
|
$options = Solr::solr_options();
|
||||||
|
|
||||||
if (!isset($options['indexstore']) || !($indexstore = $options['indexstore'])) {
|
if (!isset($options['indexstore']) || !($indexstore = $options['indexstore'])) {
|
||||||
@ -155,46 +207,24 @@ class Solr_Configure extends BuildTask {
|
|||||||
$mode = $indexstore['mode'];
|
$mode = $indexstore['mode'];
|
||||||
|
|
||||||
if ($mode == 'file') {
|
if ($mode == 'file') {
|
||||||
$store = new SolrConfigStore_File($indexstore);
|
return new SolrConfigStore_File($indexstore);
|
||||||
} elseif ($mode == 'webdav') {
|
} elseif ($mode == 'webdav') {
|
||||||
$store = new SolrConfigStore_WebDAV($indexstore);
|
return new SolrConfigStore_WebDAV($indexstore);
|
||||||
} elseif (ClassInfo::exists($mode) && ClassInfo::classImplements($mode, 'SolrConfigStore')) {
|
} elseif (ClassInfo::exists($mode) && ClassInfo::classImplements($mode, 'SolrConfigStore')) {
|
||||||
$store = new $mode($indexstore);
|
return new $mode($indexstore);
|
||||||
} else {
|
} else {
|
||||||
user_error('Unknown Solr index mode '.$indexstore['mode'], E_USER_ERROR);
|
user_error('Unknown Solr index mode '.$indexstore['mode'], E_USER_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
foreach ($indexes as $instance) {
|
|
||||||
$index = $instance->getIndexName();
|
|
||||||
echo "Configuring $index. \n"; flush();
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Upload the config files for this index
|
|
||||||
echo "Uploading configuration ... \n"; flush();
|
|
||||||
|
|
||||||
$store->uploadString($index, 'schema.xml', (string)$instance->generateSchema());
|
|
||||||
|
|
||||||
foreach (glob($instance->getExtrasPath().'/*') as $file) {
|
|
||||||
if (is_file($file)) $store->uploadFile($index, $file);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Then tell Solr to use those config files
|
protected function log($message) {
|
||||||
if ($service->coreIsActive($index)) {
|
if(Director::is_cli()) {
|
||||||
echo "Reloading core ... \n";
|
echo $message . "\n";
|
||||||
$service->coreReload($index);
|
|
||||||
} else {
|
} else {
|
||||||
echo "Creating core ... \n";
|
echo Convert::raw2xml($message) . "<br />";
|
||||||
$service->coreCreate($index, $store->instanceDir($index));
|
|
||||||
}
|
|
||||||
|
|
||||||
// And done
|
|
||||||
echo "Done\n";
|
|
||||||
|
|
||||||
} catch(Exception $e) {
|
|
||||||
// We got an exception. Warn, but continue to next index.
|
|
||||||
echo "Failure: " . $e->getMessage() . "\n"; flush();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
flush();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -142,6 +142,26 @@ abstract class SolrIndex extends SearchIndex {
|
|||||||
return implode("\n\t\t", $xml);
|
return implode("\n\t\t", $xml);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract first suggestion text from collated values
|
||||||
|
*
|
||||||
|
* @param mixed $collation
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
protected function getCollatedSuggestion($collation = '') {
|
||||||
|
if(is_string($collation)) {
|
||||||
|
return $collation;
|
||||||
|
}
|
||||||
|
if(is_object($collation)) {
|
||||||
|
if(isset($collation->misspellingsAndCorrections)) {
|
||||||
|
foreach($collation->misspellingsAndCorrections as $key => $value) {
|
||||||
|
return $value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract a human friendly spelling suggestion from a Solr spellcheck collation string.
|
* Extract a human friendly spelling suggestion from a Solr spellcheck collation string.
|
||||||
* @param String $collation
|
* @param String $collation
|
||||||
@ -472,14 +492,18 @@ abstract class SolrIndex extends SearchIndex {
|
|||||||
* @param SearchQuery $query
|
* @param SearchQuery $query
|
||||||
* @param integer $offset
|
* @param integer $offset
|
||||||
* @param integer $limit
|
* @param integer $limit
|
||||||
* @param Array $params Extra request parameters passed through to Solr
|
* @param array $params Extra request parameters passed through to Solr
|
||||||
* @return ArrayData Map with the following keys:
|
* @return ArrayData Map with the following keys:
|
||||||
* - 'Matches': ArrayList of the matched object instances
|
* - 'Matches': ArrayList of the matched object instances
|
||||||
*/
|
*/
|
||||||
public function search(SearchQuery $query, $offset = -1, $limit = -1, $params = array()) {
|
public function search(SearchQuery $query, $offset = -1, $limit = -1, $params = array()) {
|
||||||
$service = $this->getService();
|
$service = $this->getService();
|
||||||
|
|
||||||
SearchVariant::with(count($query->classes) == 1 ? $query->classes[0]['class'] : null)->call('alterQuery', $query, $this);
|
$searchClass = count($query->classes) == 1
|
||||||
|
? $query->classes[0]['class']
|
||||||
|
: null;
|
||||||
|
SearchVariant::with($searchClass)
|
||||||
|
->call('alterQuery', $query, $this);
|
||||||
|
|
||||||
$q = array(); // Query
|
$q = array(); // Query
|
||||||
$fq = array(); // Filter query
|
$fq = array(); // Filter query
|
||||||
@ -665,15 +689,18 @@ abstract class SolrIndex extends SearchIndex {
|
|||||||
|
|
||||||
// Suggestions. Requires spellcheck.collate=true in $params
|
// Suggestions. Requires spellcheck.collate=true in $params
|
||||||
if(isset($res->spellcheck->suggestions->collation)) {
|
if(isset($res->spellcheck->suggestions->collation)) {
|
||||||
|
// Extract string suggestion
|
||||||
|
$suggestion = $this->getCollatedSuggestion($res->spellcheck->suggestions->collation);
|
||||||
|
|
||||||
// The collation, including advanced query params (e.g. +), suitable for making another query programmatically.
|
// The collation, including advanced query params (e.g. +), suitable for making another query programmatically.
|
||||||
$ret['Suggestion'] = $res->spellcheck->suggestions->collation;
|
$ret['Suggestion'] = $suggestion;
|
||||||
|
|
||||||
// A human friendly version of the suggestion, suitable for 'Did you mean $SuggestionNice?' display.
|
// A human friendly version of the suggestion, suitable for 'Did you mean $SuggestionNice?' display.
|
||||||
$ret['SuggestionNice'] = $this->getNiceSuggestion($res->spellcheck->suggestions->collation);
|
$ret['SuggestionNice'] = $this->getNiceSuggestion($suggestion);
|
||||||
|
|
||||||
// A string suitable for appending to an href as a query string.
|
// A string suitable for appending to an href as a query string.
|
||||||
// For example <a href="http://example.com/search?q=$SuggestionQueryString">$SuggestionNice</a>
|
// For example <a href="http://example.com/search?q=$SuggestionQueryString">$SuggestionNice</a>
|
||||||
$ret['SuggestionQueryString'] = $this->getSuggestionQueryString($res->spellcheck->suggestions->collation);
|
$ret['SuggestionQueryString'] = $this->getSuggestionQueryString($suggestion);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -694,4 +721,25 @@ abstract class SolrIndex extends SearchIndex {
|
|||||||
$this->service = $service;
|
$this->service = $service;
|
||||||
return $this;
|
return $this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Upload config for this index to the given store
|
||||||
|
*
|
||||||
|
* @param SolrConfigStore $store
|
||||||
|
*/
|
||||||
|
public function uploadConfig($store) {
|
||||||
|
// Upload the config files for this index
|
||||||
|
$store->uploadString(
|
||||||
|
$this->getIndexName(),
|
||||||
|
'schema.xml',
|
||||||
|
(string)$this->generateSchema()
|
||||||
|
);
|
||||||
|
|
||||||
|
// Upload additional files
|
||||||
|
foreach (glob($this->getExtrasPath().'/*') as $file) {
|
||||||
|
if (is_file($file)) {
|
||||||
|
$store->uploadFile($this->getIndexName(), $file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -863,6 +863,10 @@
|
|||||||
<str>nameOfCustomComponent2</str>
|
<str>nameOfCustomComponent2</str>
|
||||||
</arr>
|
</arr>
|
||||||
-->
|
-->
|
||||||
|
|
||||||
|
<arr name="last-components">
|
||||||
|
<str>spellcheck</str>
|
||||||
|
</arr>
|
||||||
</requestHandler>
|
</requestHandler>
|
||||||
|
|
||||||
<!-- A request handler that returns indented JSON by default -->
|
<!-- A request handler that returns indented JSON by default -->
|
||||||
@ -1254,7 +1258,31 @@
|
|||||||
<!-- a spellchecker built from a field of the main index -->
|
<!-- a spellchecker built from a field of the main index -->
|
||||||
<lst name="spellchecker">
|
<lst name="spellchecker">
|
||||||
<str name="name">default</str>
|
<str name="name">default</str>
|
||||||
<str name="field">text</str>
|
<str name="field">_text</str>
|
||||||
|
<str name="classname">solr.DirectSolrSpellChecker</str>
|
||||||
|
<!-- the spellcheck distance measure used, the default is the internal levenshtein -->
|
||||||
|
<str name="distanceMeasure">internal</str>
|
||||||
|
<!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
|
||||||
|
<float name="accuracy">0.5</float>
|
||||||
|
<!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
|
||||||
|
<int name="maxEdits">2</int>
|
||||||
|
<!-- the minimum shared prefix when enumerating terms -->
|
||||||
|
<int name="minPrefix">1</int>
|
||||||
|
<!-- maximum number of inspections per result. -->
|
||||||
|
<int name="maxInspections">5</int>
|
||||||
|
<!-- minimum length of a query term to be considered for correction -->
|
||||||
|
<int name="minQueryLength">4</int>
|
||||||
|
<!-- maximum threshold of documents a query term can appear to be considered for correction -->
|
||||||
|
<float name="maxQueryFrequency">0.01</float>
|
||||||
|
<!-- uncomment this to require suggestions to occur in 1% of the documents
|
||||||
|
<float name="thresholdTokenFrequency">.01</float>
|
||||||
|
-->
|
||||||
|
</lst>
|
||||||
|
|
||||||
|
<!-- Custom spellcheck dictionary limited to an optional _spellcheckText field -->
|
||||||
|
<lst name="spellchecker">
|
||||||
|
<str name="name">_spellcheck</str>
|
||||||
|
<str name="field">_spellcheckText</str>
|
||||||
<str name="classname">solr.DirectSolrSpellChecker</str>
|
<str name="classname">solr.DirectSolrSpellChecker</str>
|
||||||
<!-- the spellcheck distance measure used, the default is the internal levenshtein -->
|
<!-- the spellcheck distance measure used, the default is the internal levenshtein -->
|
||||||
<str name="distanceMeasure">internal</str>
|
<str name="distanceMeasure">internal</str>
|
||||||
@ -1279,7 +1307,7 @@
|
|||||||
<lst name="spellchecker">
|
<lst name="spellchecker">
|
||||||
<str name="name">wordbreak</str>
|
<str name="name">wordbreak</str>
|
||||||
<str name="classname">solr.WordBreakSolrSpellChecker</str>
|
<str name="classname">solr.WordBreakSolrSpellChecker</str>
|
||||||
<str name="field">name</str>
|
<str name="field">_text</str>
|
||||||
<str name="combineWords">true</str>
|
<str name="combineWords">true</str>
|
||||||
<str name="breakWords">true</str>
|
<str name="breakWords">true</str>
|
||||||
<int name="maxChanges">10</int>
|
<int name="maxChanges">10</int>
|
||||||
|
Loading…
Reference in New Issue
Block a user