Merge branch '3'

# Conflicts:
 #	composer.json
 #	src/Solr/Tasks/Solr_Configure.php
This commit is contained in:
Robbie Averill 2019-03-25 13:24:04 +13:00
commit d6a119ce20
8 changed files with 67 additions and 50 deletions

View File

@ -8,7 +8,7 @@
<!-- The optional sortMissingLast and sortMissingFirst attributes are <!-- The optional sortMissingLast and sortMissingFirst attributes are
currently supported on types that are sorted internally as strings. currently supported on types that are sorted internally as strings.
This includes "string","boolean","sint","slong","sfloat","sdouble","pdate" This includes "string","boolean","sint","slong","sfloat","sdouble","pdate"
- If sortMissingLast="true", then a sort on this field will cause documents - If sortMissingLast="true", then a sort on this field will cause documents
without the field to come after documents with the field, without the field to come after documents with the field,
regardless of the requested sort order (asc or desc). regardless of the requested sort order (asc or desc).
@ -136,9 +136,11 @@
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
<filter class="solr.KeywordRepeatFilterFactory"/> <filter class="solr.KeywordRepeatFilterFactory"/>
<filter class="solr.StopFilterFactory" <filter class="solr.StopFilterFactory"
ignoreCase="true" ignoreCase="true"
@ -162,9 +164,11 @@
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
<filter class="solr.KeywordRepeatFilterFactory"/> <filter class="solr.KeywordRepeatFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/> <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
@ -210,8 +214,8 @@
<tokenizer class="solr.StandardTokenizerFactory" /> <tokenizer class="solr.StandardTokenizerFactory" />
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/> <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.LengthFilterFactory" min="4" max="20" /> <filter class="solr.LengthFilterFactory" min="4" max="20" />
<filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.RemoveDuplicatesTokenFilterFactory" /> <filter class="solr.RemoveDuplicatesTokenFilterFactory" />
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -239,7 +243,7 @@
<!-- A general unstemmed text field that indexes tokens normally and also <!-- A general unstemmed text field that indexes tokens normally and also
reversed (via ReversedWildcardFilterFactory), to enable more efficient reversed (via ReversedWildcardFilterFactory), to enable more efficient
leading wildcard queries. --> leading wildcard queries. -->
<fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer class="solr.WhitespaceTokenizerFactory"/>
@ -320,10 +324,10 @@
a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
Attributes of the DelimitedPayloadTokenFilterFactory : Attributes of the DelimitedPayloadTokenFilterFactory :
"delimiter" - a one character delimiter. Default is | (pipe) "delimiter" - a one character delimiter. Default is | (pipe)
"encoder" - how to encode the following value into a playload "encoder" - how to encode the following value into a playload
float -> org.apache.lucene.analysis.payloads.FloatEncoder, float -> org.apache.lucene.analysis.payloads.FloatEncoder,
integer -> o.a.l.a.p.IntegerEncoder integer -> o.a.l.a.p.IntegerEncoder
identity -> o.a.l.a.p.IdentityEncoder identity -> o.a.l.a.p.IdentityEncoder
Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor. Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
--> -->
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/> <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
@ -364,8 +368,8 @@
<!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. --> <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/> <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
<!-- <!--
A Geohash is a compact representation of a latitude longitude pair in a single field. A Geohash is a compact representation of a latitude longitude pair in a single field.
See http://wiki.apache.org/solr/SpatialSearch See http://wiki.apache.org/solr/SpatialSearch
--> -->
<fieldtype name="geohash" class="solr.GeoHashField"/> <fieldtype name="geohash" class="solr.GeoHashField"/>

View File

@ -11,7 +11,7 @@ use SilverStripe\FullTextSearch\Search\Queries\SearchQuery;
$index = MyIndex::singleton(); $index = MyIndex::singleton();
$query = SearchQuery::create() $query = SearchQuery::create()
->addSearchTerm('My Term'); ->addSearchTerm('My Term');
$params = [ $params = [
'facet' => 'true', 'facet' => 'true',
'facet.field' => 'SiteTree_ClassName', 'facet.field' => 'SiteTree_ClassName',
]; ];
@ -159,7 +159,7 @@ substituted, which will include the original token.
* Two comma-separated lists of words with the symbol "=>" between them. If the token matches any word on * Two comma-separated lists of words with the symbol "=>" between them. If the token matches any word on
the left, then the list on the right is substituted. The original token will not be included unless it is also in the the left, then the list on the right is substituted. The original token will not be included unless it is also in the
list on the right. list on the right.
For example: For example:
@ -207,7 +207,7 @@ $results = $index->search($query, -1, -1, $params);
$results->spellcheck; $results->spellcheck;
``` ```
The built-in `_text` data is better than nothing, but also has some problems: it's heavily processed, for example by The built-in `_text` data is better than nothing, but also has some problems: it's heavily processed, for example by
stemming filters which butcher words. So misspelling "Govnernance" will suggest "govern" rather than "Governance". stemming filters which butcher words. So misspelling "Govnernance" will suggest "govern" rather than "Governance".
This can be fixed by aggregating spell checking data in a separate field. This can be fixed by aggregating spell checking data in a separate field.
@ -289,22 +289,22 @@ Each result will automatically contain an `Excerpt` property which you can use i
to avoid matching HTML attributes, and cluttering highlighted content with unparsed HTML. to avoid matching HTML attributes, and cluttering highlighted content with unparsed HTML.
## Boosting/Weighting ## Boosting/Weighting
Results aren't all created equal. Matches in some fields are more important than others; for example, a page `Title` might be considered more relevant to the user than terms in the `Content` field. Results aren't all created equal. Matches in some fields are more important than others; for example, a page `Title` might be considered more relevant to the user than terms in the `Content` field.
To account for this, a "weighting" (or "boosting") factor can be applied to each searched field. The default value is `1.0`, anything below that will decrease the relevance, anything above increases it. You can get more information on relevancy at the [Solr wiki](http://wiki.apache.org/solr/SolrRelevancyFAQ). To account for this, a "weighting" (or "boosting") factor can be applied to each searched field. The default value is `1.0`, anything below that will decrease the relevance, anything above increases it. You can get more information on relevancy at the [Solr wiki](http://wiki.apache.org/solr/SolrRelevancyFAQ).
You can manage the boosting in two ways: You can manage the boosting in two ways:
### Boosting on query ### Boosting on query
To adjust the relative values at the time of querying, pass them in as the third argument to your `addSearchTerm()` call: To adjust the relative values at the time of querying, pass them in as the third argument to your `addSearchTerm()` call:
```php ```php
use My\Namespace\Index\MyIndex; use My\Namespace\Index\MyIndex;
use SilverStripe\FullTextSearch\Search\Queries\SearchQuery; use SilverStripe\FullTextSearch\Search\Queries\SearchQuery;
use Page; use Page;
$query = SearchQuery::create() $query = SearchQuery::create()
->addSearchTerm( ->addSearchTerm(
'fire', 'fire',
@ -317,9 +317,9 @@ You can manage the boosting in two ways:
); );
$results = MyIndex::singleton()->search($query); $results = MyIndex::singleton()->search($query);
``` ```
This will ensure that `Title` is given higher priority for matches than `Content`, which is well above `SecretParagraph`. This will ensure that `Title` is given higher priority for matches than `Content`, which is well above `SecretParagraph`.
### Boosting on index ### Boosting on index
Boost values for specific can also be specified directly on the `SolrIndex` class directly. Boost values for specific can also be specified directly on the `SolrIndex` class directly.
@ -357,14 +357,14 @@ class SolrSearchIndex extends SolrIndex
## Indexing related objects ## Indexing related objects
To add a related object to your index. To add a related object to your index.
## Subsites ## Subsites
When you are utilising the [subsites module](https://github.com/silverstripe/silverstripe-subsites) you When you are utilising the [subsites module](https://github.com/silverstripe/silverstripe-subsites) you
may want to add [boosting](#boosting/weighting) to results from the current subsite. To do so, you'll may want to add [boosting](#boosting/weighting) to results from the current subsite. To do so, you'll
need to use [eDisMax](https://lucene.apache.org/solr/guide/6_6/the-extended-dismax-query-parser.html) need to use [eDisMax](https://lucene.apache.org/solr/guide/6_6/the-extended-dismax-query-parser.html)
and the supporting parameters `bq` and `bf`. You should add the following to your `SolrIndex` and the supporting parameters `bq` and `bf`. You should add the following to your `SolrIndex`
extension: extension:
```php ```php
@ -385,7 +385,7 @@ public function search(SearchQuery $query, $offset = -1, $limit = -1, $params =
## Custom field types ## Custom field types
Solr supports custom field type definitions which are written to its XML schema. Many standard ones are already included Solr supports custom field type definitions which are written to its XML schema. Many standard ones are already included
in the default schema. As the XML file is generated dynamically, we can add our own types by overloading the template in the default schema. As the XML file is generated dynamically, we can add our own types by overloading the template
responsible for it: `types.ss`. responsible for it: `types.ss`.
In the following example, we read our type definitions from a new file `mysite/solr/templates/types.ss` instead: In the following example, we read our type definitions from a new file `mysite/solr/templates/types.ss` instead:
@ -428,7 +428,7 @@ To allow searches on words containing numeric tokens, you'll need to change the
The `ASCIIFoldingFilterFactory` filter converts alphabetic, numeric, and symbolic Unicode characters which are not in the Basic Latin Unicode block (the first 127 ASCII characters) to their ASCII equivalents, if one exists. The `ASCIIFoldingFilterFactory` filter converts alphabetic, numeric, and symbolic Unicode characters which are not in the Basic Latin Unicode block (the first 127 ASCII characters) to their ASCII equivalents, if one exists.
Find the fields in your overloaded `types.ss` that you want to enable this behaviour in, for example inside the `<fieldType name="htmltext">` block, add the following to both its index analyzer and query analyzer records. By default, this functionality is enabled on the `htmltext` and `text` fieldTypes. If you want it enabled for any other fieldTypes simply find the fields in your overloaded `types.ss` that you want to enable this behaviour in, for example inside the `<fieldType name="textTight">` block, add the following to both its index analyzer and query analyzer records.
```xml ```xml
<filter class="solr.ASCIIFoldingFilterFactory"/> <filter class="solr.ASCIIFoldingFilterFactory"/>
@ -436,7 +436,7 @@ Find the fields in your overloaded `types.ss` that you want to enable this behav
## Text extraction ## Text extraction
Solr provides built-in text extraction capabilities for PDF and Office documents, and numerous other formats, through Solr provides built-in text extraction capabilities for PDF and Office documents, and numerous other formats, through
the `ExtractingRequestHandler` API (see [the Solr wiki entry](http://wiki.apache.org/solr/ExtractingRequestHandler). the `ExtractingRequestHandler` API (see [the Solr wiki entry](http://wiki.apache.org/solr/ExtractingRequestHandler).
If you're using a default Solr installation, it's most likely already bundled and set up. But if you plan on running the If you're using a default Solr installation, it's most likely already bundled and set up. But if you plan on running the
Solr server integrated into this module, you'll need to download the libraries and link them first. Run the following Solr server integrated into this module, you'll need to download the libraries and link them first. Run the following

View File

@ -186,6 +186,11 @@ abstract class SearchVariant
// Construct new array of variants applicable to at least one class in the list // Construct new array of variants applicable to at least one class in the list
$commonVariants = []; $commonVariants = [];
foreach ($classes as $class => $options) { foreach ($classes as $class => $options) {
// BC for numerically indexed list of classes
if (is_numeric($class) && !empty($options['class'])) {
$class = $options['class']; // $options['class'] is assumed to exist throughout the code base
}
// Extract relevant class options // Extract relevant class options
$includeSubclasses = isset($options['include_children']) ? $options['include_children'] : true; $includeSubclasses = isset($options['include_children']) ? $options['include_children'] : true;

View File

@ -9,6 +9,8 @@ use SilverStripe\Core\Manifest\ModuleLoader;
use SilverStripe\FullTextSearch\Search\FullTextSearch; use SilverStripe\FullTextSearch\Search\FullTextSearch;
use SilverStripe\FullTextSearch\Solr\Services\Solr4Service; use SilverStripe\FullTextSearch\Solr\Services\Solr4Service;
use SilverStripe\FullTextSearch\Solr\Services\Solr3Service; use SilverStripe\FullTextSearch\Solr\Services\Solr3Service;
use SilverStripe\FullTextSearch\Solr\Services\SolrService;
use SilverStripe\FullTextSearch\Solr\Services\SolrService_Core;
class Solr class Solr
{ {
@ -118,14 +120,14 @@ class Solr
/** @var SolrService | null - The instance of SolrService for core management */ /** @var SolrService | null - The instance of SolrService for core management */
protected static $service_singleton = null; protected static $service_singleton = null;
/** @var [SolrService_Core] - The instances of SolrService_Core for each core */ /** @var SolrService_Core[] - The instances of SolrService_Core for each core */
protected static $service_core_singletons = array(); protected static $service_core_singletons = array();
/** /**
* Get a SolrService * Get a SolrService
* *
* @param string $core Optional name of index class * @param string $core Optional name of index class
* @return SolrService_Core * @return SolrService|SolrService_Core
*/ */
public static function service($core = null) public static function service($core = null)
{ {
@ -148,9 +150,8 @@ class Solr
} }
return self::$service_core_singletons[$core]; return self::$service_core_singletons[$core];
} else {
return self::$service_singleton;
} }
return self::$service_singleton;
} }
public static function get_indexes() public static function get_indexes()

View File

@ -12,6 +12,7 @@ use SilverStripe\FullTextSearch\Search\Queries\SearchQuery;
use SilverStripe\FullTextSearch\Search\Queries\SearchQuery_Range; use SilverStripe\FullTextSearch\Search\Queries\SearchQuery_Range;
use SilverStripe\FullTextSearch\Search\Variants\SearchVariant; use SilverStripe\FullTextSearch\Search\Variants\SearchVariant;
use SilverStripe\FullTextSearch\Search\SearchIntrospection; use SilverStripe\FullTextSearch\Search\SearchIntrospection;
use SilverStripe\FullTextSearch\Solr\Stores\SolrConfigStore;
use SilverStripe\ORM\ArrayList; use SilverStripe\ORM\ArrayList;
use SilverStripe\ORM\DataObject; use SilverStripe\ORM\DataObject;
use SilverStripe\ORM\FieldType\DBField; use SilverStripe\ORM\FieldType\DBField;

View File

@ -3,6 +3,7 @@ namespace SilverStripe\FullTextSearch\Solr\Tasks;
use Monolog\Handler\StreamHandler; use Monolog\Handler\StreamHandler;
use Psr\Log\LoggerInterface; use Psr\Log\LoggerInterface;
use SilverStripe\Control\HTTPRequest;
use SilverStripe\Core\Injector\Injector; use SilverStripe\Core\Injector\Injector;
use SilverStripe\Dev\BuildTask; use SilverStripe\Dev\BuildTask;
use SilverStripe\FullTextSearch\Utils\Logging\SearchLogFactory; use SilverStripe\FullTextSearch\Utils\Logging\SearchLogFactory;
@ -52,7 +53,7 @@ class Solr_BuildTask extends BuildTask
/** /**
* Setup task * Setup task
* *
* @param SS_HTTPReqest $request * @param HTTPRequest $request
*/ */
public function run($request) public function run($request)
{ {

View File

@ -4,10 +4,11 @@ namespace SilverStripe\FullTextSearch\Solr\Tasks;
use Exception; use Exception;
use SilverStripe\Core\ClassInfo; use SilverStripe\Core\ClassInfo;
use SilverStripe\FullTextSearch\Solr\Solr; use SilverStripe\FullTextSearch\Solr\Solr;
use SilverStripe\FullTextSearch\Solr\SolrIndex;
use SilverStripe\FullTextSearch\Solr\Stores\SolrConfigStore;
use SilverStripe\FullTextSearch\Solr\Stores\SolrConfigStore_File; use SilverStripe\FullTextSearch\Solr\Stores\SolrConfigStore_File;
use SilverStripe\FullTextSearch\Solr\Stores\SolrConfigStore_Post; use SilverStripe\FullTextSearch\Solr\Stores\SolrConfigStore_Post;
use SilverStripe\FullTextSearch\Solr\Stores\SolrConfigStore_WebDAV; use SilverStripe\FullTextSearch\Solr\Stores\SolrConfigStore_WebDAV;
use SilverStripe\FullTextSearch\Solr\Stores\SolrConfigStore;
class Solr_Configure extends Solr_BuildTask class Solr_Configure extends Solr_BuildTask
{ {
@ -51,29 +52,30 @@ class Solr_Configure extends Solr_BuildTask
protected function updateIndex($instance, $store) protected function updateIndex($instance, $store)
{ {
$index = $instance->getIndexName(); $index = $instance->getIndexName();
$this->getLogger()->addInfo("Configuring $index."); $this->getLogger()->info("Configuring $index.");
// Upload the config files for this index // Upload the config files for this index
$this->getLogger()->addInfo("Uploading configuration ..."); $this->getLogger()->info("Uploading configuration ...");
$instance->uploadConfig($store); $instance->uploadConfig($store);
// Then tell Solr to use those config files // Then tell Solr to use those config files
$service = Solr::service(); $service = Solr::service();
if ($service->coreIsActive($index)) { if ($service->coreIsActive($index)) {
$this->getLogger()->addInfo("Reloading core ..."); $this->getLogger()->info("Reloading core ...");
$service->coreReload($index); $service->coreReload($index);
} else { } else {
$this->getLogger()->addInfo("Creating core ..."); $this->getLogger()->info("Creating core ...");
$service->coreCreate($index, $store->instanceDir($index)); $service->coreCreate($index, $store->instanceDir($index));
} }
$this->getLogger()->addInfo("Done"); $this->getLogger()->info("Done");
} }
/** /**
* Get config store * Get config store
* *
* @return SolrConfigStore * @return SolrConfigStore
* @throws Exception
*/ */
protected function getSolrConfigStore() protected function getSolrConfigStore()
{ {
@ -86,16 +88,18 @@ class Solr_Configure extends Solr_BuildTask
// Find the IndexStore handler, which will handle uploading config files to Solr // Find the IndexStore handler, which will handle uploading config files to Solr
$mode = $indexstore['mode']; $mode = $indexstore['mode'];
if ($mode == 'file') { if ($mode === 'file') {
return new SolrConfigStore_File($indexstore); return new SolrConfigStore_File($indexstore);
} elseif ($mode == 'webdav') {
return new SolrConfigStore_WebDAV($indexstore);
} elseif ($mode == 'post') {
return new SolrConfigStore_Post($indexstore);
} elseif (ClassInfo::exists($mode) && ClassInfo::classImplements($mode, SolrConfigStore::class)) {
return new $mode($indexstore);
} else {
user_error('Unknown Solr index mode ' . $indexstore['mode'], E_USER_ERROR);
} }
if ($mode === 'webdav') {
return new SolrConfigStore_WebDAV($indexstore);
}
if ($mode === 'post') {
return new SolrConfigStore_Post($indexstore);
}
if (ClassInfo::exists($mode) && ClassInfo::classImplements($mode, SolrConfigStore::class)) {
return new $mode($indexstore);
}
user_error('Unknown Solr index mode ' . $indexstore['mode'], E_USER_ERROR);
} }
} }

View File

@ -3,6 +3,7 @@
namespace SilverStripe\FullTextSearch\Utils\Logging; namespace SilverStripe\FullTextSearch\Utils\Logging;
use Psr\Log; use Psr\Log;
use Symbiote\QueuedJobs\Services\QueuedJob;
interface SearchLogFactory interface SearchLogFactory
{ {
@ -10,7 +11,7 @@ interface SearchLogFactory
* Make a logger for a queuedjob * Make a logger for a queuedjob
* *
* @param QueuedJob $job * @param QueuedJob $job
* @return Log * @return Log\LoggerInterface
*/ */
public function getQueuedJobLogger($job); public function getQueuedJobLogger($job);
@ -19,7 +20,7 @@ interface SearchLogFactory
* *
* @param string $name * @param string $name
* @param bool $verbose * @param bool $verbose
* @return Log * @return Log\LoggerInterface
*/ */
public function getOutputLogger($name, $verbose); public function getOutputLogger($name, $verbose);
} }