Merge pull request #76 from tractorcow/boosting

API Enable boosted fields to be specified on the index
This commit is contained in:
Hamish Friedlander 2015-06-30 15:56:34 +12:00
commit 0e64e94095
5 changed files with 247 additions and 33 deletions

View File

@ -31,6 +31,22 @@ abstract class SolrIndex extends SearchIndex {
protected $extrasPath = null;
protected $templatesPath = null;
/**
* List of boosted fields
*
* @var array
*/
protected $boostedFields = array();
/**
* Name of default field
*
* @var string
* @config
*/
private static $default_field = '_text';
/**
* @return String Absolute path to the folder containing
* templates which are used for generating the schema and field definitions.
@ -79,7 +95,16 @@ abstract class SolrIndex extends SearchIndex {
}
}
function getFieldDefinitions() {
/**
* Get the default text field, normally '_text'
*
* @return string
*/
public function getDefaultField() {
return $this->config()->default_field;
}
public function getFieldDefinitions() {
$xml = array();
$stored = $this->getStoredDefault();
@ -95,7 +120,8 @@ abstract class SolrIndex extends SearchIndex {
// Add the fulltext collation field
$xml[] = "<field name='_text' type='htmltext' indexed='true' stored='$stored' multiValued='true' />" ;
$df = $this->getDefaultField();
$xml[] = "<field name='{$df}' type='htmltext' indexed='true' stored='{$stored}' multiValued='true' />" ;
// Add the user-specified fields
@ -155,6 +181,93 @@ abstract class SolrIndex extends SearchIndex {
$options = array_merge($extraOptions, array('stored' => 'true'));
$this->addFulltextField($field, $forceType, $options);
}
/**
* Add a fulltext field with a boosted value
*
* @param string $field The field to add
* @param string $forceType The type to force this field as (required in some cases, when not
* detectable from metadata)
* @param array $extraOptions Dependent on search implementation
* @param float $boost Numeric boosting value (defaults to 2)
*/
public function addBoostedField($field, $forceType = null, $extraOptions = array(), $boost = 2) {
$options = array_merge($extraOptions, array('boost' => $boost));
$this->addFulltextField($field, $forceType, $options);
}
public function fieldData($field, $forceType = null, $extraOptions = array()) {
// Ensure that 'boost' is recorded here without being captured by solr
$boost = null;
if(array_key_exists('boost', $extraOptions)) {
$boost = $extraOptions['boost'];
unset($extraOptions['boost']);
}
$data = parent::fieldData($field, $forceType, $extraOptions);
// Boost all fields with this name
if(isset($boost)) {
foreach($data as $fieldName => $fieldInfo) {
$this->boostedFields[$fieldName] = $boost;
}
}
return $data;
}
/**
* Set the default boosting level for a specific field.
* Will control the default value for qf param (Query Fields), but will not
* override a query-specific value.
*
* Fields must be added before having a field boosting specified
*
* @param string $field Full field key (Model_Field)
* @param float|null $level Numeric boosting value. Set to null to clear boost
*/
public function setFieldBoosting($field, $level) {
if(!isset($this->fulltextFields[$field])) {
throw new InvalidArgumentException("No fulltext field $field exists on ".$this->getIndexName());
}
if($level === null) {
unset($this->boostedFields[$field]);
} else {
$this->boostedFields[$field] = $level;
}
}
/**
* Get all boosted fields
*
* @return array
*/
public function getBoostedFields() {
return $this->boostedFields;
}
/**
* Determine the best default value for the 'qf' parameter
*
* @return array|null List of query fields, or null if not specified
*/
public function getQueryFields() {
// Not necessary to specify this unless boosting
if(empty($this->boostedFields)) {
return null;
}
$queryFields = array();
foreach ($this->boostedFields as $fieldName => $boost) {
$queryFields[] = $fieldName . '^' . $boost;
}
// If any fields are queried, we must always include the default field, otherwise it will be excluded
$df = $this->getDefaultField();
if($queryFields && !isset($this->boostedFields[$df])) {
$queryFields[] = $df;
}
return $queryFields;
}
/**
* Gets the default 'stored' value for fields in this index
@ -235,8 +348,9 @@ abstract class SolrIndex extends SearchIndex {
function getCopyFieldDefinitions() {
$xml = array();
$df = $this->getDefaultField();
foreach ($this->fulltextFields as $name => $field) {
$xml[] = "<copyField source='{$name}' dest='_text' />";
$xml[] = "<copyField source='{$name}' dest='{$df}' />";
}
foreach ($this->copyFields as $source => $fields) {
@ -367,9 +481,10 @@ abstract class SolrIndex extends SearchIndex {
SearchVariant::with(count($query->classes) == 1 ? $query->classes[0]['class'] : null)->call('alterQuery', $query, $this);
$q = array();
$fq = array();
$hlq = array();
$q = array(); // Query
$fq = array(); // Filter query
$qf = array(); // Query fields
$hlq = array(); // Highlight query
// Build the search itself
@ -463,10 +578,24 @@ abstract class SolrIndex extends SearchIndex {
$fq[] = ($missing ? "+{$field}:[* TO *] " : '') . '-('.implode(' ', $excludeq).')';
}
// Prepare query fields unless specified explicitly
if(isset($params['qf'])) {
$qf = $params['qf'];
} else {
$qf = $this->getQueryFields();
}
if(is_array($qf)) {
$qf = implode(' ', $qf);
}
if($qf) {
$params['qf'] = $qf;
}
if(!headers_sent() && !Director::isLive()) {
if ($q) header('X-Query: '.implode(' ', $q));
if ($fq) header('X-Filters: "'.implode('", "', $fq).'"');
if ($qf) header('X-QueryFields: '.$qf);
}
if ($offset == -1) $offset = $query->start;
@ -474,12 +603,12 @@ abstract class SolrIndex extends SearchIndex {
if ($limit == -1) $limit = SearchQuery::$default_page_size;
$params = array_merge($params, array('fq' => implode(' ', $fq)));
$res = $service->search(
$q ? implode(' ', $q) : '*:*',
$offset,
$limit,
$params,
$params,
Apache_Solr_Service::METHOD_POST
);

View File

@ -61,7 +61,7 @@
<uniqueKey>_documentid</uniqueKey>
<defaultSearchField>_text</defaultSearchField>
<defaultSearchField>$DefaultField</defaultSearchField>
<solrQueryParser defaultOperator="OR"/>

View File

@ -224,7 +224,11 @@ These fields are defined in the schema.xml file that gets sent to Solr.
// the request to Solr would be:
// q=(SiteTree_Title:Lorem+OR+SiteTree_Content:Lorem)
### Configuring boosts on fields
### Configuring boosts
There are several ways in which you can configure boosting on search fields or terms.
#### Boosting on search query
Solr has a way of specifying which fields should be boosted as a parameter to `SearchQuery`.
@ -244,6 +248,35 @@ In this example, we enter "Lorem" as the search term, and boost the `Content` fi
More information on [relevancy on the Solr wiki](http://wiki.apache.org/solr/SolrRelevancyFAQ).
### Boosting on index fields
Boost values for specific can also be specified directly on the `SolrIndex` class directly.
The following methods can be used to set one or more boosted fields:
* `SolrIndex::addBoostedField` Adds a field with a specific boosted value (defaults to 2)
* `SolrIndex::setFieldBoosting` If a field has already been added to an index, the boosting
value can be customised, changed, or reset for a single field.
* `SolrIndex::addFulltextField` A boost can be set for a field using the `$extraOptions` parameter
with the key `boost` assigned to the desired value.
For example:
:::php
class SolrSearchIndex extends SolrIndex {
public function init() {
$this->addClass('SiteTree');
$this->addAllFulltextFields();
$this->addFilterField('ShowInSearch');
this->addBoostedField('Title', null, array(), 1.5);
this->setFieldBoosting('SiteTree_SearchBoost', 2);
}
}
### Custom Types
Solr supports custom field type definitions which are written to its XML schema.

View File

@ -1,31 +1,13 @@
<?php
class SearchVariantVersionedTest_Item extends SiteTree {
// TODO: Currently theres a failure if you addClass a non-table class
private static $db = array(
'TestText' => 'Varchar'
);
}
class SearchVariantVersionedTest_Index extends SearchIndex_Recording {
function init() {
$this->addClass('SearchVariantVersionedTest_Item');
$this->addFilterField('TestText');
}
}
class SearchVariantVersionedTest_IndexNoStage extends SearchIndex_Recording {
function init() {
$this->addClass('SearchVariantVersionedTest_Item');
$this->addFilterField('TestText');
$this->excludeVariantState(array('SearchVariantVersioned' => 'Stage'));
}
}
class SearchVariantVersionedTest extends SapphireTest {
private static $index = null;
protected $extraDataObjects = array(
'SearchVariantVersionedTest_Item'
);
function setUp() {
parent::setUp();
@ -108,3 +90,25 @@ class SearchVariantVersionedTest extends SapphireTest {
));
}
}
class SearchVariantVersionedTest_Item extends SiteTree implements TestOnly {
// TODO: Currently theres a failure if you addClass a non-table class
private static $db = array(
'TestText' => 'Varchar'
);
}
class SearchVariantVersionedTest_Index extends SearchIndex_Recording {
function init() {
$this->addClass('SearchVariantVersionedTest_Item');
$this->addFilterField('TestText');
}
}
class SearchVariantVersionedTest_IndexNoStage extends SearchIndex_Recording {
function init() {
$this->addClass('SearchVariantVersionedTest_Item');
$this->addFilterField('TestText');
$this->excludeVariantState(array('SearchVariantVersioned' => 'Stage'));
}
}

View File

@ -46,7 +46,10 @@ class SolrIndexTest extends SapphireTest {
$this->assertEquals('SearchUpdaterTest_ManyMany', $data['class']);
}
function testBoost() {
/**
* Test boosting on SearchQuery
*/
function testBoostedQuery() {
$serviceMock = $this->getServiceMock();
Phockito::when($serviceMock)->search(anything(), anything(), anything(), anything(), anything())->return($this->getFakeRawSolrResponse());
@ -63,6 +66,31 @@ class SolrIndexTest extends SapphireTest {
Phockito::verify($serviceMock)->search('+(Field1:term^1.5 OR HasOneObject_Field1:term^3)', anything(), anything(), anything(), anything());
}
/**
* Test boosting on field schema (via queried fields parameter)
*/
public function testBoostedField() {
$serviceMock = $this->getServiceMock();
Phockito::when($serviceMock)
->search(anything(), anything(), anything(), anything(), anything())
->return($this->getFakeRawSolrResponse());
$index = new SolrIndexTest_BoostedIndex();
$index->setService($serviceMock);
$query = new SearchQuery();
$query->search('term');
$index->search($query);
// Ensure matcher contains correct boost in 'qf' parameter
$matcher = new Hamcrest_Array_IsArrayContainingKeyValuePair(
new Hamcrest_Core_IsEqual('qf'),
new Hamcrest_Core_IsEqual('SearchUpdaterTest_Container_Field1^1.5 SearchUpdaterTest_Container_Field2^2.1 _text')
);
Phockito::verify($serviceMock)
->search('+term', anything(), anything(), $matcher, anything());
}
function testHighlightQueryOnBoost() {
$serviceMock = $this->getServiceMock();
@ -207,6 +235,9 @@ class SolrIndexTest extends SapphireTest {
);
}
/**
* @return Solr3Service
*/
protected function getServiceMock() {
return Phockito::mock('Solr3Service');
}
@ -257,3 +288,20 @@ class SolrIndexTest_FakeIndex2 extends SolrIndex {
$this->addFilterField('ManyManyObjects.Field1');
}
}
class SolrIndexTest_BoostedIndex extends SolrIndex {
protected function getStoredDefault() {
// Override isDev defaulting to stored
return 'false';
}
function init() {
$this->addClass('SearchUpdaterTest_Container');
$this->addAllFulltextFields();
$this->setFieldBoosting('SearchUpdaterTest_Container_Field1', 1.5);
$this->addBoostedField('Field2', null, array(), 2.1);
}
}