NEW Pre-filtering of indexed record through 'list' option in addClass()

This commit is contained in:
Ingo Schommer 2013-06-17 17:02:50 +02:00
parent bb2e5624f3
commit 8316c36911
5 changed files with 73 additions and 17 deletions

View File

@ -180,7 +180,9 @@ abstract class SearchIndex extends ViewableData {
*
* @throws Exception
* @param String $class - The class to include
* @param array $options - TODO: Remove
* @param array $options
* - 'include_children': TODO remove
* - 'list': A {@link DataList} to pre-filter records to be indexed
*/
public function addClass($class, $options = array()) {
if ($this->fulltextFields || $this->filterFields || $this->sortFields) {
@ -191,8 +193,13 @@ abstract class SearchIndex extends ViewableData {
throw new InvalidArgumentException('Can\'t add classes which don\'t have data tables (no $db or $has_one set on the class)');
}
if(isset($options['list']) && !($options['list'] instanceof SS_List)) {
throw new InvalidArgumentException('The "list" option needs to be of type SS_List');
}
$options = array_merge(array(
'include_children' => true
'include_children' => true,
'list' => null
), $options);
$this->classes[$class] = $options;

View File

@ -234,9 +234,15 @@ class SearchUpdater extends Object {
static function process_dirty_indexes($dirty) {
$indexes = FullTextSearch::get_indexes();
$dirtyindexes = array();
$originalState = SearchVariant::current_state();
$listsByClass = array();
foreach($indexes as $index) {
foreach($index->getClasses() as $class => $options) {
$listsByClass[$class] = ($options['list']) ? $options['list'] : DataList::create($class);
}
}
foreach ($dirty as $base => $statefulids) {
if (!$statefulids) continue;
@ -245,8 +251,7 @@ class SearchUpdater extends Object {
$ids = $statefulid['ids'];
SearchVariant::activate_state($state);
$objs = DataObject::get($base, '"'.$base.'"."ID" IN ('.implode(',', array_keys($ids)).')');
$objs = $listsByClass[$class]->filter('ID', array_keys($ids));
if ($objs) foreach ($objs as $obj) {
foreach ($ids[$obj->ID] as $index) {
if (!$indexes[$index]->variantStateExcluded($state)) {

View File

@ -216,13 +216,12 @@ class Solr_Reindex extends BuildTask {
SearchVariant::activate_state($state);
$filter = $includeSubclasses ? "" : '"ClassName" = \''.$class."'";
$singleton = singleton($class);
$query = $singleton->get($class,$filter,null);
$dtaQuery = $query->dataQuery();
$list = ($options['list']) ? $options['list'] : DataList::create($class);
if($options['include_children']) $list = $list->filter('ClassName', $class);
$dtaQuery = $list->dataQuery();
$sqlQuery = $dtaQuery->getFinalisedQuery();
$singleton->extend('augmentSQL',$sqlQuery,$dtaQuery);
$total = $query->count();
singleton($class)->extend('augmentSQL',$sqlQuery,$dtaQuery);
$total = $list->count();
$statevar = json_encode($state);
echo "Class: $class, total: $total";
@ -258,11 +257,10 @@ class Solr_Reindex extends BuildTask {
SearchVariant::activate_state($variantstate);
$includeSubclasses = $options['include_children'];
$filter = $includeSubclasses ? "" : '"ClassName" = \''.$class."'";
$items = DataList::create($class)->where($filter)->limit($this->stat('recordsPerRequest'), $start);
foreach ($items as $item) { $index->add($item); $item->destroy(); }
$list = ($options['list']) ? $options['list'] : DataList::create($class);
if($options['include_children']) $list = $list->filter('ClassName', $class);
$list = $list->limit($this->stat('recordsPerRequest'), $start);
foreach ($list as $item) { $index->add($item); $item->destroy(); }
}
}

View File

@ -125,6 +125,36 @@ As you can only search one index at a time, all searchable classes need to be in
TODO
## Pre-filtering Indexed Records
Sometimes you only want to filter a subset of all records of a certain type,
e.g. to avoid unnecessarily large indices, or to simplify queries by excluding
records that should never match any query variation.
Example: Add `File` records, but filter for documents only (by extension)
<?php
class MyIndex extends SolrIndex {
function init() {
$extCategories = File::config()->get('app_categories');
$filesList = File::get()->where(
implode(
' OR ',
array_map(
function($ext) {return '"Filename" LIKE \'%.' . Convert::raw2sql($ext) . '\'';},
$extCategories['doc']
)
)
);
$this->addClass('File', array('list' => $filesList));
// ...
}
}
Please take care when using this method for records with "variant states" such
as the `Versioned` extension. These add filters on query creation, based on the current
context, and need to be unset to avoid unwanted side effects.
## Weighting/Boosting Fields
Results aren't all created equal. Matches in some fields are more important

View File

@ -40,7 +40,8 @@ class SearchUpdaterTest_HasMany extends DataObject {
class SearchUpdaterTest_Index extends SearchIndex_Recording {
function init() {
$this->addClass('SearchUpdaterTest_Container');
$list = SearchUpdaterTest_Container::get()->where('"Field1" IS NULL OR "Field1" <> \'ExcludedFromIndex\'');
$this->addClass('SearchUpdaterTest_Container', array('list' => $list));
$this->addFilterField('Field1');
$this->addFilterField('HasOneObject.Field1');
@ -72,6 +73,21 @@ class SearchUpdaterTest extends SapphireTest {
// TODO: Get updating just field2 to not update item (maybe not possible - variants complicate)
}
function testDoesNotIndexItemsExcludedFromIndex() {
$included = new SearchUpdaterTest_Container();
$included->write();
$excluded = new SearchUpdaterTest_Container();
$excluded->Field1 = 'ExcludedFromIndex';
$excluded->write();
// Check the default "writing a document updates the document"
SearchUpdater::flush_dirty_indexes();
$this->assertEquals(self::$index->getAdded(array('ID')), array(
array('ID' => $included->ID),
));
}
function testHasOneHook() {
$hasOne = new SearchUpdaterTest_HasOne();
$hasOne->write();