mirror of
https://github.com/silverstripe/silverstripe-fulltextsearch
synced 2024-10-22 14:05:29 +02:00
Merge pull request #82 from tractorcow/pulls/reindex-task
API QueuedJob support for Solr_Reindex
This commit is contained in:
commit
ea8de7da15
@ -2,3 +2,5 @@
|
||||
|
||||
global $databaseConfig;
|
||||
if (isset($databaseConfig['type'])) SearchUpdater::bind_manipulation_capture();
|
||||
|
||||
Deprecation::notification_version('1.0.0', 'fulltextsearch');
|
||||
|
@ -4,6 +4,10 @@ Name: defaultprocessor
|
||||
Injector:
|
||||
SearchUpdateProcessor:
|
||||
class: SearchUpdateImmediateProcessor
|
||||
SolrReindexHandler:
|
||||
class: SolrReindexImmediateHandler
|
||||
SearchLogFactory:
|
||||
class: 'MonologFactory'
|
||||
---
|
||||
Name: messagequeueprocessor
|
||||
Only:
|
||||
@ -14,6 +18,8 @@ Except:
|
||||
Injector:
|
||||
SearchUpdateProcessor:
|
||||
class: SearchUpdateMessageQueueProcessor
|
||||
SolrReindexHandler:
|
||||
class: SolrReindexMessageHandler
|
||||
---
|
||||
Name: queuedjobprocessor
|
||||
Only:
|
||||
@ -24,3 +30,5 @@ Except:
|
||||
Injector:
|
||||
SearchUpdateProcessor:
|
||||
class: SearchUpdateQueuedJobProcessor
|
||||
SolrReindexHandler:
|
||||
class: SolrReindexQueuedHandler
|
||||
|
@ -39,6 +39,14 @@ abstract class SearchVariant {
|
||||
*/
|
||||
abstract function activateState($state);
|
||||
|
||||
/**
|
||||
* Apply this variant to a search query
|
||||
*
|
||||
* @param SearchQuery $query
|
||||
* @param SearchIndex $index
|
||||
*/
|
||||
abstract public function alterQuery($query, $index);
|
||||
|
||||
/*** OVERRIDES end here*/
|
||||
|
||||
/** Holds a cache of all variants */
|
||||
|
@ -48,7 +48,7 @@ class SearchVariantSiteTreeSubsitesPolyhome extends SearchVariant {
|
||||
);
|
||||
}
|
||||
|
||||
function alterQuery($query, $index) {
|
||||
public function alterQuery($query, $index) {
|
||||
$subsite = Subsite::currentSubsiteID();
|
||||
$query->filter('_subsite', array($subsite, SearchQuery::$missing));
|
||||
}
|
||||
|
@ -28,7 +28,7 @@ class SearchVariantVersioned extends SearchVariant {
|
||||
);
|
||||
}
|
||||
|
||||
function alterQuery($query) {
|
||||
public function alterQuery($query, $index) {
|
||||
$stage = Versioned::current_stage();
|
||||
$query->filter('_versionedstage', array($stage, SearchQuery::$missing));
|
||||
}
|
||||
|
@ -1,5 +1,10 @@
|
||||
<?php
|
||||
|
||||
use Monolog\Formatter\LineFormatter;
|
||||
use Monolog\Handler\StreamHandler;
|
||||
use Monolog\Logger;
|
||||
use Psr\Log\LoggerInterface;
|
||||
|
||||
class Solr {
|
||||
|
||||
/**
|
||||
@ -145,10 +150,70 @@ class Solr {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Abstract class for build tasks
|
||||
*/
|
||||
class Solr_BuildTask extends BuildTask {
|
||||
|
||||
class Solr_Configure extends BuildTask {
|
||||
protected $enabled = false;
|
||||
|
||||
/**
|
||||
* Logger
|
||||
*
|
||||
* @var LoggerInterface
|
||||
*/
|
||||
protected $logger = null;
|
||||
|
||||
/**
|
||||
* Get the current logger
|
||||
*
|
||||
* @return LoggerInterface
|
||||
*/
|
||||
public function getLogger() {
|
||||
return $this->logger;
|
||||
}
|
||||
|
||||
/**
|
||||
* Assign a new logger
|
||||
*
|
||||
* @param LoggerInterface $logger
|
||||
*/
|
||||
public function setLogger(LoggerInterface $logger) {
|
||||
$this->logger = $logger;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return SearchLogFactory
|
||||
*/
|
||||
protected function getLoggerFactory() {
|
||||
return Injector::inst()->get('SearchLogFactory');
|
||||
}
|
||||
|
||||
/**
|
||||
* Setup task
|
||||
*
|
||||
* @param SS_HTTPReqest $request
|
||||
*/
|
||||
public function run($request) {
|
||||
$name = get_class($this);
|
||||
$verbose = $request->getVar('verbose');
|
||||
|
||||
// Set new logger
|
||||
$logger = $this
|
||||
->getLoggerFactory()
|
||||
->getOutputLogger($name, $verbose);
|
||||
$this->setLogger($logger);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class Solr_Configure extends Solr_BuildTask {
|
||||
|
||||
protected $enabled = true;
|
||||
|
||||
public function run($request) {
|
||||
parent::run($request);
|
||||
|
||||
// Find the IndexStore handler, which will handle uploading config files to Solr
|
||||
$store = $this->getSolrConfigStore();
|
||||
$indexes = Solr::get_indexes();
|
||||
@ -158,7 +223,9 @@ class Solr_Configure extends BuildTask {
|
||||
$this->updateIndex($instance, $store);
|
||||
} catch(Exception $e) {
|
||||
// We got an exception. Warn, but continue to next index.
|
||||
$this->log("Failure: " . $e->getMessage());
|
||||
$this
|
||||
->getLogger()
|
||||
->error("Failure: " . $e->getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -171,26 +238,25 @@ class Solr_Configure extends BuildTask {
|
||||
*/
|
||||
protected function updateIndex($instance, $store) {
|
||||
$index = $instance->getIndexName();
|
||||
$this->log("Configuring $index.");
|
||||
$this->log("Uploading configuration ... ");
|
||||
|
||||
$this->getLogger()->info("Configuring $index.");
|
||||
|
||||
// Upload the config files for this index
|
||||
$this->getLogger()->info("Uploading configuration ...");
|
||||
$instance->uploadConfig($store);
|
||||
|
||||
// Then tell Solr to use those config files
|
||||
$service = Solr::service();
|
||||
if ($service->coreIsActive($index)) {
|
||||
$this->log("Reloading core ...");
|
||||
$this->getLogger()->info("Reloading core ...");
|
||||
$service->coreReload($index);
|
||||
} else {
|
||||
$this->log("Creating core ...");
|
||||
$this->getLogger()->info("Creating core ...");
|
||||
$service->coreCreate($index, $store->instanceDir($index));
|
||||
}
|
||||
|
||||
$this->log("Done");
|
||||
$this->getLogger()->info("Done");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get config store
|
||||
*
|
||||
@ -217,19 +283,26 @@ class Solr_Configure extends BuildTask {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
protected function log($message) {
|
||||
if(Director::is_cli()) {
|
||||
echo $message . "\n";
|
||||
} else {
|
||||
echo Convert::raw2xml($message) . "<br />";
|
||||
}
|
||||
flush();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Task used for both initiating a new reindex, as well as for processing incremental batches
|
||||
* within a reindex.
|
||||
*
|
||||
* When running a complete reindex you can provide any of the following
|
||||
* - class (to limit to a single class)
|
||||
* - verbose (optional)
|
||||
*
|
||||
* When running with a single batch, provide the following querystring arguments:
|
||||
* - start
|
||||
* - index
|
||||
* - class
|
||||
* - variantstate
|
||||
* - verbose (optional)
|
||||
*/
|
||||
class Solr_Reindex extends Solr_BuildTask {
|
||||
|
||||
class Solr_Reindex extends BuildTask {
|
||||
protected $enabled = true;
|
||||
|
||||
/**
|
||||
* Number of records to load and index per request
|
||||
@ -239,117 +312,94 @@ class Solr_Reindex extends BuildTask {
|
||||
*/
|
||||
private static $recordsPerRequest = 200;
|
||||
|
||||
public function run($request) {
|
||||
increase_time_limit_to();
|
||||
$self = get_class($this);
|
||||
$verbose = isset($_GET['verbose']);
|
||||
|
||||
$originalState = SearchVariant::current_state();
|
||||
|
||||
if (isset($_GET['start'])) {
|
||||
$this->runFrom(singleton($_GET['index']), $_GET['class'], $_GET['start'], json_decode($_GET['variantstate'], true));
|
||||
}
|
||||
else {
|
||||
foreach(array('framework','sapphire') as $dirname) {
|
||||
$script = sprintf("%s%s$dirname%scli-script.php", BASE_PATH, DIRECTORY_SEPARATOR, DIRECTORY_SEPARATOR);
|
||||
if(file_exists($script)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
$class = get_class($this);
|
||||
|
||||
foreach (Solr::get_indexes() as $index => $instance) {
|
||||
echo "Rebuilding {$instance->getIndexName()}\n\n";
|
||||
|
||||
$classes = $instance->getClasses();
|
||||
if($request->getVar('class')) {
|
||||
$limitClasses = explode(',', $request->getVar('class'));
|
||||
$classes = array_intersect_key($classes, array_combine($limitClasses, $limitClasses));
|
||||
}
|
||||
|
||||
if($classes) {
|
||||
Solr::service($index)->deleteByQuery('ClassHierarchy:(' . implode(' OR ', array_keys($classes)) . ')');
|
||||
}
|
||||
|
||||
foreach ($classes as $class => $options) {
|
||||
$includeSubclasses = $options['include_children'];
|
||||
|
||||
foreach (SearchVariant::reindex_states($class, $includeSubclasses) as $state) {
|
||||
if ($instance->variantStateExcluded($state)) continue;
|
||||
|
||||
SearchVariant::activate_state($state);
|
||||
|
||||
$filter = $includeSubclasses ? "" : '"ClassName" = \''.$class."'";
|
||||
$singleton = singleton($class);
|
||||
$query = $singleton->get($class,$filter,null);
|
||||
$dtaQuery = $query->dataQuery();
|
||||
$sqlQuery = $dtaQuery->getFinalisedQuery();
|
||||
$singleton->extend('augmentSQL',$sqlQuery,$dtaQuery);
|
||||
$total = $query->count();
|
||||
|
||||
$statevar = json_encode($state);
|
||||
echo "Class: $class, total: $total";
|
||||
echo ($statevar) ? " in state $statevar\n" : "\n";
|
||||
|
||||
if (strpos(PHP_OS, "WIN") !== false) $statevar = '"'.str_replace('"', '\\"', $statevar).'"';
|
||||
else $statevar = "'".$statevar."'";
|
||||
|
||||
for ($offset = 0; $offset < $total; $offset += $this->stat('recordsPerRequest')) {
|
||||
echo "$offset..";
|
||||
|
||||
$cmd = "php $script dev/tasks/$self index=$index class=$class start=$offset variantstate=$statevar";
|
||||
|
||||
if($verbose) {
|
||||
echo "\n Running '$cmd'\n";
|
||||
$cmd .= " verbose=1 2>&1";
|
||||
}
|
||||
|
||||
$res = $verbose ? passthru($cmd) : `$cmd`;
|
||||
if($verbose) echo " ".preg_replace('/\r\n|\n/', '$0 ', $res)."\n";
|
||||
|
||||
// If we're in dev mode, commit more often for fun and profit
|
||||
if (Director::isDev()) Solr::service($index)->commit();
|
||||
|
||||
// This will slow down things a tiny bit, but it is done so that we don't timeout to the database during a reindex
|
||||
DB::query('SELECT 1');
|
||||
}
|
||||
|
||||
echo "\n";
|
||||
}
|
||||
}
|
||||
|
||||
Solr::service($index)->commit();
|
||||
}
|
||||
}
|
||||
|
||||
$originalState = SearchVariant::current_state();
|
||||
/**
|
||||
* Get the reindex handler
|
||||
*
|
||||
* @return SolrReindexHandler
|
||||
*/
|
||||
protected function getHandler() {
|
||||
return Injector::inst()->get('SolrReindexHandler');
|
||||
}
|
||||
|
||||
protected function runFrom($index, $class, $start, $variantstate) {
|
||||
$classes = $index->getClasses();
|
||||
$options = $classes[$class];
|
||||
$verbose = isset($_GET['verbose']);
|
||||
/**
|
||||
* @param SS_HTTPRequest $request
|
||||
*/
|
||||
public function run($request) {
|
||||
parent::run($request);
|
||||
|
||||
// Reset state
|
||||
$originalState = SearchVariant::current_state();
|
||||
$this->doReindex($request);
|
||||
SearchVariant::activate_state($originalState);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param SS_HTTPRequest $request
|
||||
*/
|
||||
protected function doReindex($request) {
|
||||
$class = $request->getVar('class');
|
||||
|
||||
// Deprecated reindex mechanism
|
||||
$start = $request->getVar('start');
|
||||
if ($start !== null) {
|
||||
// Run single batch directly
|
||||
$indexInstance = singleton($request->getVar('index'));
|
||||
$state = json_decode($request->getVar('variantstate'), true);
|
||||
$this->runFrom($indexInstance, $class, $start, $state);
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if we are re-indexing a single group
|
||||
// If not using queuedjobs, we need to invoke Solr_Reindex as a separate process
|
||||
// Otherwise each group is processed via a SolrReindexGroupJob
|
||||
$groups = $request->getVar('groups');
|
||||
$handler = $this->getHandler();
|
||||
if($groups) {
|
||||
// Run grouped batches (id % groups = group)
|
||||
$group = $request->getVar('group');
|
||||
$indexInstance = singleton($request->getVar('index'));
|
||||
$state = json_decode($request->getVar('variantstate'), true);
|
||||
|
||||
$handler->runGroup($this->getLogger(), $indexInstance, $state, $class, $groups, $group);
|
||||
return;
|
||||
}
|
||||
|
||||
// If run at the top level, delegate to appropriate handler
|
||||
$self = get_class($this);
|
||||
$handler->triggerReindex($this->getLogger(), $this->config()->recordsPerRequest, $self, $class);
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated since version 2.0.0
|
||||
*/
|
||||
protected function runFrom($index, $class, $start, $variantstate) {
|
||||
DeprecationTest_Deprecation::notice('2.0.0', 'Solr_Reindex now uses a new grouping mechanism');
|
||||
|
||||
// Set time limit and state
|
||||
increase_time_limit_to();
|
||||
SearchVariant::activate_state($variantstate);
|
||||
|
||||
$includeSubclasses = $options['include_children'];
|
||||
$filter = $includeSubclasses ? "" : '"ClassName" = \''.$class."'";
|
||||
|
||||
// Generate filtered list
|
||||
$items = DataList::create($class)
|
||||
->where($filter)
|
||||
->limit($this->stat('recordsPerRequest'), $start);
|
||||
->limit($this->config()->recordsPerRequest, $start);
|
||||
|
||||
if($verbose) echo "Adding $class";
|
||||
foreach ($items as $item) {
|
||||
if($verbose) echo $item->ID . ' ';
|
||||
// Add child filter
|
||||
$classes = $index->getClasses();
|
||||
$options = $classes[$class];
|
||||
if(!$options['include_children']) {
|
||||
$items = $items->filter('ClassName', $class);
|
||||
}
|
||||
|
||||
// Process selected records in this class
|
||||
$this->getLogger()->info("Adding $class");
|
||||
foreach ($items->sort("ID") as $item) {
|
||||
$this->getLogger()->debug($item->ID);
|
||||
|
||||
// See SearchUpdater_ObjectHandler::triggerReindex
|
||||
$item->triggerReindex();
|
||||
|
||||
$item->destroy();
|
||||
}
|
||||
|
||||
if($verbose) echo "Done ";
|
||||
$this->getLogger()->info("Done");
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -511,6 +511,39 @@ abstract class SolrIndex extends SearchIndex {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all records which do not match the given classname whitelist.
|
||||
*
|
||||
* Can also be used to trim an index when reducing to a narrower set of classes.
|
||||
*
|
||||
* Ignores current state / variant.
|
||||
*
|
||||
* @param array $classes List of non-obsolete classes in the same format as SolrIndex::getClasses()
|
||||
* @return bool Flag if successful
|
||||
*/
|
||||
public function clearObsoleteClasses($classes) {
|
||||
if(empty($classes)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Delete all records which do not match the necessary classname rules
|
||||
$conditions = array();
|
||||
foreach ($classes as $class => $options) {
|
||||
if ($options['include_children']) {
|
||||
$conditions[] = "ClassHierarchy:{$class}";
|
||||
} else {
|
||||
$conditions[] = "ClassName:{$class}";
|
||||
}
|
||||
}
|
||||
|
||||
// Delete records which don't match any of these conditions in this index
|
||||
$deleteQuery = "-(" . implode(' ', $conditions) . ")";
|
||||
$this
|
||||
->getService()
|
||||
->deleteByQuery($deleteQuery);
|
||||
return true;
|
||||
}
|
||||
|
||||
function commit() {
|
||||
try {
|
||||
$this->getService()->commit(false, false, false);
|
||||
@ -543,32 +576,8 @@ abstract class SolrIndex extends SearchIndex {
|
||||
$hlq = array(); // Highlight query
|
||||
|
||||
// Build the search itself
|
||||
$q = $this->getQueryComponent($query, $hlq);
|
||||
|
||||
foreach ($query->search as $search) {
|
||||
$text = $search['text'];
|
||||
preg_match_all('/"[^"]*"|\S+/', $text, $parts);
|
||||
|
||||
$fuzzy = $search['fuzzy'] ? '~' : '';
|
||||
|
||||
foreach ($parts[0] as $part) {
|
||||
$fields = (isset($search['fields'])) ? $search['fields'] : array();
|
||||
if(isset($search['boost'])) {
|
||||
$fields = array_merge($fields, array_keys($search['boost']));
|
||||
}
|
||||
if ($fields) {
|
||||
$searchq = array();
|
||||
foreach ($fields as $field) {
|
||||
$boost = (isset($search['boost'][$field])) ? '^' . $search['boost'][$field] : '';
|
||||
$searchq[] = "{$field}:".$part.$fuzzy.$boost;
|
||||
}
|
||||
$q[] = '+('.implode(' OR ', $searchq).')';
|
||||
}
|
||||
else {
|
||||
$q[] = '+'.$part.$fuzzy;
|
||||
}
|
||||
$hlq[] = $part;
|
||||
}
|
||||
}
|
||||
// If using boosting, set the clean term separately for highlighting.
|
||||
// See https://issues.apache.org/jira/browse/SOLR-2632
|
||||
if(array_key_exists('hl', $params) && !array_key_exists('hl.q', $params)) {
|
||||
@ -576,64 +585,17 @@ abstract class SolrIndex extends SearchIndex {
|
||||
}
|
||||
|
||||
// Filter by class if requested
|
||||
|
||||
$classq = array();
|
||||
|
||||
foreach ($query->classes as $class) {
|
||||
if (!empty($class['includeSubclasses'])) $classq[] = 'ClassHierarchy:'.$class['class'];
|
||||
if (!empty($class['includeSubclasses'])) {
|
||||
$classq[] = 'ClassHierarchy:'.$class['class'];
|
||||
}
|
||||
else $classq[] = 'ClassName:'.$class['class'];
|
||||
}
|
||||
|
||||
if ($classq) $fq[] = '+('.implode(' ', $classq).')';
|
||||
|
||||
|
||||
// Filter by filters
|
||||
|
||||
foreach ($query->require as $field => $values) {
|
||||
$requireq = array();
|
||||
|
||||
foreach ($values as $value) {
|
||||
if ($value === SearchQuery::$missing) {
|
||||
$requireq[] = "(*:* -{$field}:[* TO *])";
|
||||
}
|
||||
else if ($value === SearchQuery::$present) {
|
||||
$requireq[] = "{$field}:[* TO *]";
|
||||
}
|
||||
else if ($value instanceof SearchQuery_Range) {
|
||||
$start = $value->start; if ($start === null) $start = '*';
|
||||
$end = $value->end; if ($end === null) $end = '*';
|
||||
$requireq[] = "$field:[$start TO $end]";
|
||||
}
|
||||
else {
|
||||
$requireq[] = $field.':"'.$value.'"';
|
||||
}
|
||||
}
|
||||
|
||||
$fq[] = '+('.implode(' ', $requireq).')';
|
||||
}
|
||||
|
||||
foreach ($query->exclude as $field => $values) {
|
||||
$excludeq = array();
|
||||
$missing = false;
|
||||
|
||||
foreach ($values as $value) {
|
||||
if ($value === SearchQuery::$missing) {
|
||||
$missing = true;
|
||||
}
|
||||
else if ($value === SearchQuery::$present) {
|
||||
$excludeq[] = "{$field}:[* TO *]";
|
||||
}
|
||||
else if ($value instanceof SearchQuery_Range) {
|
||||
$start = $value->start; if ($start === null) $start = '*';
|
||||
$end = $value->end; if ($end === null) $end = '*';
|
||||
$excludeq[] = "$field:[$start TO $end]";
|
||||
}
|
||||
else {
|
||||
$excludeq[] = $field.':"'.$value.'"';
|
||||
}
|
||||
}
|
||||
|
||||
$fq[] = ($missing ? "+{$field}:[* TO *] " : '') . '-('.implode(' ', $excludeq).')';
|
||||
}
|
||||
$fq = array_merge($fq, $this->getFiltersComponent($query));
|
||||
|
||||
// Prepare query fields unless specified explicitly
|
||||
if(isset($params['qf'])) {
|
||||
@ -739,6 +701,136 @@ abstract class SolrIndex extends SearchIndex {
|
||||
return new ArrayData($ret);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the query (q) component for this search
|
||||
*
|
||||
* @param SearchQuery $searchQuery
|
||||
* @param array &$hlq Highlight query returned by reference
|
||||
* @return array
|
||||
*/
|
||||
protected function getQueryComponent(SearchQuery $searchQuery, &$hlq = array()) {
|
||||
$q = array();
|
||||
foreach ($searchQuery->search as $search) {
|
||||
$text = $search['text'];
|
||||
preg_match_all('/"[^"]*"|\S+/', $text, $parts);
|
||||
|
||||
$fuzzy = $search['fuzzy'] ? '~' : '';
|
||||
|
||||
foreach ($parts[0] as $part) {
|
||||
$fields = (isset($search['fields'])) ? $search['fields'] : array();
|
||||
if(isset($search['boost'])) {
|
||||
$fields = array_merge($fields, array_keys($search['boost']));
|
||||
}
|
||||
if ($fields) {
|
||||
$searchq = array();
|
||||
foreach ($fields as $field) {
|
||||
$boost = (isset($search['boost'][$field])) ? '^' . $search['boost'][$field] : '';
|
||||
$searchq[] = "{$field}:".$part.$fuzzy.$boost;
|
||||
}
|
||||
$q[] = '+('.implode(' OR ', $searchq).')';
|
||||
}
|
||||
else {
|
||||
$q[] = '+'.$part.$fuzzy;
|
||||
}
|
||||
$hlq[] = $part;
|
||||
}
|
||||
}
|
||||
return $q;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse all require constraints for inclusion in a filter query
|
||||
*
|
||||
* @param SearchQuery $searchQuery
|
||||
* @return array List of parsed string values for each require
|
||||
*/
|
||||
protected function getRequireFiltersComponent(SearchQuery $searchQuery) {
|
||||
$fq = array();
|
||||
foreach ($searchQuery->require as $field => $values) {
|
||||
$requireq = array();
|
||||
|
||||
foreach ($values as $value) {
|
||||
if ($value === SearchQuery::$missing) {
|
||||
$requireq[] = "(*:* -{$field}:[* TO *])";
|
||||
}
|
||||
else if ($value === SearchQuery::$present) {
|
||||
$requireq[] = "{$field}:[* TO *]";
|
||||
}
|
||||
else if ($value instanceof SearchQuery_Range) {
|
||||
$start = $value->start;
|
||||
if ($start === null) {
|
||||
$start = '*';
|
||||
}
|
||||
$end = $value->end;
|
||||
if ($end === null) {
|
||||
$end = '*';
|
||||
}
|
||||
$requireq[] = "$field:[$start TO $end]";
|
||||
}
|
||||
else {
|
||||
$requireq[] = $field.':"'.$value.'"';
|
||||
}
|
||||
}
|
||||
|
||||
$fq[] = '+('.implode(' ', $requireq).')';
|
||||
}
|
||||
return $fq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse all exclude constraints for inclusion in a filter query
|
||||
*
|
||||
* @param SearchQuery $searchQuery
|
||||
* @return array List of parsed string values for each exclusion
|
||||
*/
|
||||
protected function getExcludeFiltersComponent(SearchQuery $searchQuery) {
|
||||
$fq = array();
|
||||
foreach ($searchQuery->exclude as $field => $values) {
|
||||
$excludeq = array();
|
||||
$missing = false;
|
||||
|
||||
foreach ($values as $value) {
|
||||
if ($value === SearchQuery::$missing) {
|
||||
$missing = true;
|
||||
}
|
||||
else if ($value === SearchQuery::$present) {
|
||||
$excludeq[] = "{$field}:[* TO *]";
|
||||
}
|
||||
else if ($value instanceof SearchQuery_Range) {
|
||||
$start = $value->start;
|
||||
if ($start === null) {
|
||||
$start = '*';
|
||||
}
|
||||
$end = $value->end;
|
||||
if ($end === null) {
|
||||
$end = '*';
|
||||
}
|
||||
$excludeq[] = "$field:[$start TO $end]";
|
||||
}
|
||||
else {
|
||||
$excludeq[] = $field.':"'.$value.'"';
|
||||
}
|
||||
}
|
||||
|
||||
$fq[] = ($missing ? "+{$field}:[* TO *] " : '') . '-('.implode(' ', $excludeq).')';
|
||||
}
|
||||
return $fq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all filter conditions for this search
|
||||
*
|
||||
* @param SearchQuery $searchQuery
|
||||
* @return array
|
||||
*/
|
||||
public function getFiltersComponent(SearchQuery $searchQuery) {
|
||||
return array_merge(
|
||||
$this->getRequireFiltersComponent($searchQuery),
|
||||
$this->getExcludeFiltersComponent($searchQuery)
|
||||
);
|
||||
}
|
||||
|
||||
protected $service;
|
||||
|
||||
/**
|
||||
|
230
code/solr/reindex/handlers/SolrReindexBase.php
Normal file
230
code/solr/reindex/handlers/SolrReindexBase.php
Normal file
@ -0,0 +1,230 @@
|
||||
<?php
|
||||
|
||||
use Psr\Log\LoggerInterface;
|
||||
|
||||
/**
|
||||
* Base class for re-indexing of solr content
|
||||
*/
|
||||
abstract class SolrReindexBase implements SolrReindexHandler {
|
||||
|
||||
public function runReindex(LoggerInterface $logger, $batchSize, $taskName, $classes = null) {
|
||||
foreach (Solr::get_indexes() as $indexInstance) {
|
||||
$this->processIndex($logger, $indexInstance, $batchSize, $taskName, $classes);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process index for a single SolrIndex instance
|
||||
*
|
||||
* @param LoggerInterface $logger
|
||||
* @param SolrIndex $indexInstance
|
||||
* @param int $batchSize
|
||||
* @param string $taskName
|
||||
* @param string $classes
|
||||
*/
|
||||
protected function processIndex(
|
||||
LoggerInterface $logger, SolrIndex $indexInstance, $batchSize, $taskName, $classes = null
|
||||
) {
|
||||
// Filter classes for this index
|
||||
$indexClasses = $this->getClassesForIndex($indexInstance, $classes);
|
||||
|
||||
// Clear all records in this index which do not contain the given classes
|
||||
$logger->info("Clearing obsolete classes from ".$indexInstance->getIndexName());
|
||||
$indexInstance->clearObsoleteClasses($indexClasses);
|
||||
|
||||
// Build queue for each class
|
||||
foreach ($indexClasses as $class => $options) {
|
||||
$includeSubclasses = $options['include_children'];
|
||||
|
||||
foreach (SearchVariant::reindex_states($class, $includeSubclasses) as $state) {
|
||||
$this->processVariant($logger, $indexInstance, $state, $class, $includeSubclasses, $batchSize, $taskName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get valid classes and options for an index with an optional filter
|
||||
*
|
||||
* @param SolrIndex $index
|
||||
* @param string|array $filterClasses Optional class or classes to limit to
|
||||
* @return array List of classes, where the key is the classname and value is list of options
|
||||
*/
|
||||
protected function getClassesForIndex(SolrIndex $index, $filterClasses = null) {
|
||||
// Get base classes
|
||||
$classes = $index->getClasses();
|
||||
if(!$filterClasses) {
|
||||
return $classes;
|
||||
}
|
||||
|
||||
// Apply filter
|
||||
if(!is_array($filterClasses)) {
|
||||
$filterClasses = explode(',', $filterClasses);
|
||||
}
|
||||
return array_intersect_key($classes, array_combine($filterClasses, $filterClasses));
|
||||
}
|
||||
|
||||
/**
|
||||
* Process re-index for a given variant state and class
|
||||
*
|
||||
* @param LoggerInterface $logger
|
||||
* @param SolrIndex $indexInstance
|
||||
* @param array $state Variant state
|
||||
* @param string $class
|
||||
* @param bool $includeSubclasses
|
||||
* @param int $batchSize
|
||||
* @param string $taskName
|
||||
*/
|
||||
protected function processVariant(
|
||||
LoggerInterface $logger, SolrIndex $indexInstance, $state,
|
||||
$class, $includeSubclasses, $batchSize, $taskName
|
||||
) {
|
||||
// Set state
|
||||
SearchVariant::activate_state($state);
|
||||
|
||||
// Count records
|
||||
$query = $class::get();
|
||||
if(!$includeSubclasses) {
|
||||
$query = $query->filter('ClassName', $class);
|
||||
}
|
||||
$total = $query->count();
|
||||
|
||||
// Skip this variant if nothing to process, or if there are no records
|
||||
if ($total == 0 || $indexInstance->variantStateExcluded($state)) {
|
||||
// Remove all records in the current state, since there are no groups to process
|
||||
$logger->info("Clearing all records of type {$class} in the current state: " . json_encode($state));
|
||||
$this->clearRecords($indexInstance, $class);
|
||||
return;
|
||||
}
|
||||
|
||||
// For each group, run processing
|
||||
$groups = (int)(($total + $batchSize - 1) / $batchSize);
|
||||
for ($group = 0; $group < $groups; $group++) {
|
||||
$this->processGroup($logger, $indexInstance, $state, $class, $groups, $group, $taskName);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initiate the processing of a single group
|
||||
*
|
||||
* @param LoggerInterface $logger
|
||||
* @param SolrIndex $indexInstance Index instance
|
||||
* @param array $state Variant state
|
||||
* @param string $class Class to index
|
||||
* @param int $groups Total groups
|
||||
* @param int $group Index of group to process
|
||||
* @param string $taskName Name of task script to run
|
||||
*/
|
||||
abstract protected function processGroup(
|
||||
LoggerInterface $logger, SolrIndex $indexInstance, $state, $class, $groups, $group, $taskName
|
||||
);
|
||||
|
||||
/**
|
||||
* Explicitly invoke the process that performs the group
|
||||
* processing. Can be run either by a background task or a queuedjob.
|
||||
*
|
||||
* Does not commit changes to the index, so this must be controlled externally.
|
||||
*
|
||||
* @param LoggerInterface $logger
|
||||
* @param SolrIndex $indexInstance
|
||||
* @param array $state
|
||||
* @param string $class
|
||||
* @param int $groups
|
||||
* @param int $group
|
||||
*/
|
||||
public function runGroup(
|
||||
LoggerInterface $logger, SolrIndex $indexInstance, $state, $class, $groups, $group
|
||||
) {
|
||||
// Set time limit and state
|
||||
increase_time_limit_to();
|
||||
SearchVariant::activate_state($state);
|
||||
$logger->info("Adding $class");
|
||||
|
||||
// Prior to adding these records to solr, delete existing solr records
|
||||
$this->clearRecords($indexInstance, $class, $groups, $group);
|
||||
|
||||
// Process selected records in this class
|
||||
$items = $this->getRecordsInGroup($indexInstance, $class, $groups, $group);
|
||||
$processed = array();
|
||||
foreach ($items as $item) {
|
||||
$processed[] = $item->ID;
|
||||
|
||||
// By this point, obsolete classes/states have been removed in processVariant
|
||||
// and obsolete records have been removed in clearRecords
|
||||
$indexInstance->add($item);
|
||||
$item->destroy();
|
||||
}
|
||||
$logger->info("Updated ".implode(',', $processed));
|
||||
|
||||
// This will slow down things a tiny bit, but it is done so that we don't timeout to the database during a reindex
|
||||
DB::query('SELECT 1');
|
||||
|
||||
$logger->info("Done");
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the datalist of records in the given group in the current state
|
||||
*
|
||||
* Assumes that the desired variant state is in effect.
|
||||
*
|
||||
* @param SolrIndex $indexInstance
|
||||
* @param string $class
|
||||
* @param int $groups
|
||||
* @param int $group
|
||||
* @return DataList
|
||||
*/
|
||||
protected function getRecordsInGroup(SolrIndex $indexInstance, $class, $groups, $group) {
|
||||
// Generate filtered list of local records
|
||||
$baseClass = ClassInfo::baseDataClass($class);
|
||||
$items = DataList::create($class)
|
||||
->where(sprintf(
|
||||
'"%s"."ID" %% \'%d\' = \'%d\'',
|
||||
$baseClass,
|
||||
intval($groups),
|
||||
intval($group)
|
||||
))
|
||||
->sort("ID");
|
||||
|
||||
// Add child filter
|
||||
$classes = $indexInstance->getClasses();
|
||||
$options = $classes[$class];
|
||||
if(!$options['include_children']) {
|
||||
$items = $items->filter('ClassName', $class);
|
||||
}
|
||||
|
||||
return $items;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all records of the given class in the current state ONLY.
|
||||
*
|
||||
* Optionally delete from a given group (where the group is defined as the ID % total groups)
|
||||
*
|
||||
* @param SolrIndex $indexInstance Index instance
|
||||
* @param string $class Class name
|
||||
* @param int $groups Number of groups, if clearing from a striped group
|
||||
* @param int $group Group number, if clearing from a striped group
|
||||
*/
|
||||
protected function clearRecords(SolrIndex $indexInstance, $class, $groups = null, $group = null) {
|
||||
// Clear by classname
|
||||
$conditions = array("+(ClassHierarchy:{$class})");
|
||||
|
||||
// If grouping, delete from this group only
|
||||
if($groups) {
|
||||
$conditions[] = "+_query_:\"{!frange l={$group} u={$group}}mod(ID, {$groups})\"";
|
||||
}
|
||||
|
||||
// Also filter by state (suffix on document ID)
|
||||
$query = new SearchQuery();
|
||||
SearchVariant::with($class)
|
||||
->call('alterQuery', $query, $indexInstance);
|
||||
if($query->isfiltered()) {
|
||||
$conditions = array_merge($conditions, $indexInstance->getFiltersComponent($query));
|
||||
}
|
||||
|
||||
// Invoke delete on index
|
||||
$deleteQuery = implode(' ', $conditions);
|
||||
$indexInstance
|
||||
->getService()
|
||||
->deleteByQuery($deleteQuery);
|
||||
}
|
||||
}
|
42
code/solr/reindex/handlers/SolrReindexHandler.php
Normal file
42
code/solr/reindex/handlers/SolrReindexHandler.php
Normal file
@ -0,0 +1,42 @@
|
||||
<?php
|
||||
|
||||
use Psr\Log\LoggerInterface;
|
||||
|
||||
/**
|
||||
* Provides interface for queueing a solr reindex
|
||||
*/
|
||||
interface SolrReindexHandler {
|
||||
|
||||
/**
|
||||
* Trigger a solr-reindex
|
||||
*
|
||||
* @param LoggerInterface $logger
|
||||
* @param int $batchSize Records to run each process
|
||||
* @param string $taskName Name of devtask to run
|
||||
* @param string|array|null $classes Optional class or classes to limit index to
|
||||
*/
|
||||
public function triggerReindex(LoggerInterface $logger, $batchSize, $taskName, $classes = null);
|
||||
|
||||
/**
|
||||
* Begin an immediate re-index
|
||||
*
|
||||
* @param LoggerInterface $logger
|
||||
* @param int $batchSize Records to run each process
|
||||
* @param string $taskName Name of devtask to run
|
||||
* @param string|array|null $classes Optional class or classes to limit index to
|
||||
*/
|
||||
public function runReindex(LoggerInterface $logger, $batchSize, $taskName, $classes = null);
|
||||
|
||||
/**
|
||||
* Do an immediate re-index on the given group, where the group is defined as the list of items
|
||||
* where ID mod $groups = $group, in the given $state and optional $class filter.
|
||||
*
|
||||
* @param LoggerInterface $logger
|
||||
* @param SolrIndex $indexInstance
|
||||
* @param array $state
|
||||
* @param string $class
|
||||
* @param int $groups
|
||||
* @param int $group
|
||||
*/
|
||||
public function runGroup(LoggerInterface $logger, SolrIndex $indexInstance, $state, $class, $groups, $group);
|
||||
}
|
74
code/solr/reindex/handlers/SolrReindexImmediateHandler.php
Normal file
74
code/solr/reindex/handlers/SolrReindexImmediateHandler.php
Normal file
@ -0,0 +1,74 @@
|
||||
<?php
|
||||
|
||||
use Psr\Log\LoggerInterface;
|
||||
|
||||
/**
|
||||
* Invokes an immediate reindex
|
||||
*
|
||||
* Internally batches of records will be invoked via shell tasks in the background
|
||||
*/
|
||||
class SolrReindexImmediateHandler extends SolrReindexBase {
|
||||
|
||||
public function triggerReindex(LoggerInterface $logger, $batchSize, $taskName, $classes = null) {
|
||||
$this->runReindex($logger, $batchSize, $taskName, $classes);
|
||||
}
|
||||
|
||||
protected function processIndex(
|
||||
LoggerInterface $logger, SolrIndex $indexInstance, $batchSize, $taskName, $classes = null
|
||||
) {
|
||||
parent::processIndex($logger, $indexInstance, $batchSize, $taskName, $classes);
|
||||
|
||||
// Immediate processor needs to immediately commit after each index
|
||||
$indexInstance->getService()->commit();
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a single group.
|
||||
*
|
||||
* Without queuedjobs, it's necessary to shell this out to a background task as this is
|
||||
* very memory intensive.
|
||||
*
|
||||
* The sub-process will then invoke $processor->runGroup() in {@see Solr_Reindex::doReindex}
|
||||
*
|
||||
* @param LoggerInterface $logger
|
||||
* @param SolrIndex $indexInstance Index instance
|
||||
* @param array $state Variant state
|
||||
* @param string $class Class to index
|
||||
* @param int $groups Total groups
|
||||
* @param int $group Index of group to process
|
||||
* @param string $taskName Name of task script to run
|
||||
*/
|
||||
protected function processGroup(
|
||||
LoggerInterface $logger, SolrIndex $indexInstance, $state, $class, $groups, $group, $taskName
|
||||
) {
|
||||
// Build state
|
||||
$statevar = json_encode($state);
|
||||
if (strpos(PHP_OS, "WIN") !== false) {
|
||||
$statevar = '"'.str_replace('"', '\\"', $statevar).'"';
|
||||
} else {
|
||||
$statevar = "'".$statevar."'";
|
||||
}
|
||||
|
||||
// Build script
|
||||
$indexName = $indexInstance->getIndexName();
|
||||
$scriptPath = sprintf("%s%sframework%scli-script.php", BASE_PATH, DIRECTORY_SEPARATOR, DIRECTORY_SEPARATOR);
|
||||
$scriptTask = "php {$scriptPath} dev/tasks/{$taskName}";
|
||||
$cmd = "{$scriptTask} index={$indexName} class={$class} group={$group} groups={$groups} variantstate={$statevar}";
|
||||
$cmd .= " verbose=1 2>&1";
|
||||
$logger->info("Running '$cmd'");
|
||||
|
||||
// Execute script via shell
|
||||
$res = $logger ? passthru($cmd) : `$cmd`;
|
||||
if($logger) {
|
||||
$logger->info(preg_replace('/\r\n|\n/', '$0 ', $res));
|
||||
}
|
||||
|
||||
// If we're in dev mode, commit more often for fun and profit
|
||||
if (Director::isDev()) {
|
||||
Solr::service($indexName)->commit();
|
||||
}
|
||||
|
||||
// This will slow down things a tiny bit, but it is done so that we don't timeout to the database during a reindex
|
||||
DB::query('SELECT 1');
|
||||
}
|
||||
}
|
40
code/solr/reindex/handlers/SolrReindexMessageHandler.php
Normal file
40
code/solr/reindex/handlers/SolrReindexMessageHandler.php
Normal file
@ -0,0 +1,40 @@
|
||||
<?php
|
||||
|
||||
use Psr\Log\LoggerInterface;
|
||||
|
||||
if(!class_exists('MessageQueue')) return;
|
||||
|
||||
class SolrReindexMessageHandler extends SolrReindexImmediateHandler {
|
||||
|
||||
/**
|
||||
* The MessageQueue to use when processing updates
|
||||
* @config
|
||||
* @var string
|
||||
*/
|
||||
private static $reindex_queue = "search_indexing";
|
||||
|
||||
public function triggerReindex(LoggerInterface $logger, $batchSize, $taskName, $classes = null) {
|
||||
$queue = Config::inst()->get(__CLASS__, 'reindex_queue');
|
||||
|
||||
$logger->info('Queuing message');
|
||||
MessageQueue::send(
|
||||
$queue,
|
||||
new MethodInvocationMessage('SolrReindexMessageHandler', 'run_reindex', $batchSize, $taskName, $classes)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Entry point for message queue
|
||||
*
|
||||
* @param int $batchSize
|
||||
* @param string $taskName
|
||||
* @param array|string|null $classes
|
||||
*/
|
||||
public static function run_reindex($batchSize, $taskName, $classes = null) {
|
||||
// @todo Logger for message queue?
|
||||
$logger = Injector::inst()->createWithArgs('Monolog\Logger', array(strtolower(get_class())));
|
||||
|
||||
$inst = Injector::inst()->get(get_class());
|
||||
$inst->runReindex($logger, $batchSize, $taskName, $classes);
|
||||
}
|
||||
}
|
94
code/solr/reindex/handlers/SolrReindexQueuedHandler.php
Normal file
94
code/solr/reindex/handlers/SolrReindexQueuedHandler.php
Normal file
@ -0,0 +1,94 @@
|
||||
<?php
|
||||
|
||||
use Psr\Log\LoggerInterface;
|
||||
|
||||
if(!interface_exists('QueuedJob')) return;
|
||||
|
||||
/**
|
||||
* Represents a queued task to start the reindex job
|
||||
*/
|
||||
class SolrReindexQueuedHandler extends SolrReindexBase {
|
||||
|
||||
/**
|
||||
* @return QueuedJobService
|
||||
*/
|
||||
protected function getQueuedJobService() {
|
||||
return singleton('QueuedJobService');
|
||||
}
|
||||
|
||||
/**
|
||||
* Cancel any cancellable jobs
|
||||
*
|
||||
* @param string $type Type of job to cancel
|
||||
* @return int Number of jobs cleared
|
||||
*/
|
||||
protected function cancelExistingJobs($type) {
|
||||
$clearable = array(
|
||||
// Paused jobs need to be discarded
|
||||
QueuedJob::STATUS_PAUSED,
|
||||
|
||||
// These types would be automatically started
|
||||
QueuedJob::STATUS_NEW,
|
||||
QueuedJob::STATUS_WAIT,
|
||||
|
||||
// Cancel any in-progress job
|
||||
QueuedJob::STATUS_INIT,
|
||||
QueuedJob::STATUS_RUN
|
||||
);
|
||||
DB::query(sprintf(
|
||||
'UPDATE "QueuedJobDescriptor" '
|
||||
. ' SET "JobStatus" = \'%s\''
|
||||
. ' WHERE "JobStatus" IN (\'%s\')'
|
||||
. ' AND "Implementation" = \'%s\'',
|
||||
Convert::raw2sql(QueuedJob::STATUS_CANCELLED),
|
||||
implode("','", Convert::raw2sql($clearable)),
|
||||
Convert::raw2sql($type)
|
||||
));
|
||||
return DB::affectedRows();
|
||||
}
|
||||
|
||||
public function triggerReindex(LoggerInterface $logger, $batchSize, $taskName, $classes = null) {
|
||||
// Cancel existing jobs
|
||||
$queues = $this->cancelExistingJobs('SolrReindexQueuedJob');
|
||||
$groups = $this->cancelExistingJobs('SolrReindexGroupQueuedJob');
|
||||
$logger->info("Cancelled {$queues} re-index tasks and {$groups} re-index groups");
|
||||
|
||||
// Although this class is used as a service (singleton) it may also be instantiated
|
||||
// as a queuedjob
|
||||
$job = Injector::inst()->create('SolrReindexQueuedJob', $batchSize, $taskName, $classes);
|
||||
$this
|
||||
->getQueuedJobService()
|
||||
->queueJob($job);
|
||||
|
||||
$title = $job->getTitle();
|
||||
$logger->info("Queued {$title}");
|
||||
}
|
||||
|
||||
protected function processGroup(
|
||||
LoggerInterface $logger, SolrIndex $indexInstance, $state, $class, $groups, $group, $taskName
|
||||
) {
|
||||
// Trigger another job for this group
|
||||
$job = Injector::inst()->create(
|
||||
'SolrReindexGroupQueuedJob',
|
||||
$indexInstance->getIndexName(), $state, $class, $groups, $group
|
||||
);
|
||||
$this
|
||||
->getQueuedJobService()
|
||||
->queueJob($job);
|
||||
|
||||
$title = $job->getTitle();
|
||||
$logger->info("Queued {$title}");
|
||||
}
|
||||
|
||||
public function runGroup(
|
||||
LoggerInterface $logger, SolrIndex $indexInstance, $state, $class, $groups, $group
|
||||
) {
|
||||
parent::runGroup($logger, $indexInstance, $state, $class, $groups, $group);
|
||||
|
||||
// After any changes have been made, mark all indexes as dirty for commit
|
||||
// see http://stackoverflow.com/questions/7512945/how-to-fix-exceeded-limit-of-maxwarmingsearchers
|
||||
$logger->info("Queuing commit on all changes");
|
||||
SearchUpdateCommitJobProcessor::queue();
|
||||
}
|
||||
|
||||
}
|
117
code/solr/reindex/jobs/SolrReindexGroupQueuedJob.php
Normal file
117
code/solr/reindex/jobs/SolrReindexGroupQueuedJob.php
Normal file
@ -0,0 +1,117 @@
|
||||
<?php
|
||||
|
||||
if(!interface_exists('QueuedJob')) return;
|
||||
|
||||
/**
|
||||
* Queuedjob to re-index a small group within an index.
|
||||
*
|
||||
* This job is optimised for efficient full re-indexing of an index via Solr_Reindex.
|
||||
*
|
||||
* Operates similarly to {@see SearchUpdateQueuedJobProcessor} but can not work with an arbitrary
|
||||
* list of IDs. Instead groups are segmented by ID. Additionally, this task does incremental
|
||||
* deletions of records.
|
||||
*/
|
||||
class SolrReindexGroupQueuedJob extends SolrReindexQueuedJobBase {
|
||||
|
||||
/**
|
||||
* Name of index to reindex
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $indexName;
|
||||
|
||||
/**
|
||||
* Variant state that this group belongs to
|
||||
*
|
||||
* @var type
|
||||
*/
|
||||
protected $state;
|
||||
|
||||
/**
|
||||
* Single class name to index
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $class;
|
||||
|
||||
/**
|
||||
* Total number of groups
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
protected $groups;
|
||||
|
||||
/**
|
||||
* Group index
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
protected $group;
|
||||
|
||||
public function __construct($indexName = null, $state = null, $class = null, $groups = null, $group = null) {
|
||||
parent::__construct();
|
||||
$this->indexName = $indexName;
|
||||
$this->state = $state;
|
||||
$this->class = $class;
|
||||
$this->groups = $groups;
|
||||
$this->group = $group;
|
||||
}
|
||||
|
||||
public function getJobData() {
|
||||
$data = parent::getJobData();
|
||||
|
||||
// Custom data
|
||||
$data->jobData->indexName = $this->indexName;
|
||||
$data->jobData->state = $this->state;
|
||||
$data->jobData->class = $this->class;
|
||||
$data->jobData->groups = $this->groups;
|
||||
$data->jobData->group = $this->group;
|
||||
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function setJobData($totalSteps, $currentStep, $isComplete, $jobData, $messages) {
|
||||
parent::setJobData($totalSteps, $currentStep, $isComplete, $jobData, $messages);
|
||||
|
||||
// Custom data
|
||||
$this->indexName = $jobData->indexName;
|
||||
$this->state = $jobData->state;
|
||||
$this->class = $jobData->class;
|
||||
$this->groups = $jobData->groups;
|
||||
$this->group = $jobData->group;
|
||||
}
|
||||
|
||||
public function getSignature() {
|
||||
return md5(get_class($this) . time() . mt_rand(0, 100000));
|
||||
}
|
||||
|
||||
public function getTitle() {
|
||||
return sprintf(
|
||||
'Solr Reindex Group (%d/%d) of %s in %s',
|
||||
($this->group+1),
|
||||
$this->groups,
|
||||
$this->class,
|
||||
json_encode($this->state)
|
||||
);
|
||||
}
|
||||
|
||||
public function process() {
|
||||
$logger = $this->getLogger();
|
||||
if($this->jobFinished()) {
|
||||
$logger->notice("reindex group already complete");
|
||||
return;
|
||||
}
|
||||
|
||||
// Get instance of index
|
||||
$indexInstance = singleton($this->indexName);
|
||||
|
||||
// Send back to processor
|
||||
$logger->info("Beginning reindex group");
|
||||
$this
|
||||
->getHandler()
|
||||
->runGroup($logger, $indexInstance, $this->state, $this->class, $this->groups, $this->group);
|
||||
$logger->info("Completed reindex group");
|
||||
$this->isComplete = true;
|
||||
}
|
||||
|
||||
}
|
91
code/solr/reindex/jobs/SolrReindexQueuedJob.php
Normal file
91
code/solr/reindex/jobs/SolrReindexQueuedJob.php
Normal file
@ -0,0 +1,91 @@
|
||||
<?php
|
||||
|
||||
if(!interface_exists('QueuedJob')) return;
|
||||
|
||||
/**
|
||||
* Represents a queuedjob which invokes a reindex
|
||||
*/
|
||||
class SolrReindexQueuedJob extends SolrReindexQueuedJobBase {
|
||||
|
||||
/**
|
||||
* Size of each batch to run
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
protected $batchSize;
|
||||
|
||||
/**
|
||||
* Name of devtask Which invoked this
|
||||
* Not necessary for re-index processing performed entirely by queuedjobs
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $taskName;
|
||||
|
||||
/**
|
||||
* List of classes to filter
|
||||
*
|
||||
* @var array|string
|
||||
*/
|
||||
protected $classes;
|
||||
|
||||
public function __construct($batchSize = null, $taskName = null, $classes = null) {
|
||||
$this->batchSize = $batchSize;
|
||||
$this->taskName = $taskName;
|
||||
$this->classes = $classes;
|
||||
parent::__construct();
|
||||
}
|
||||
|
||||
public function getJobData() {
|
||||
$data = parent::getJobData();
|
||||
|
||||
// Custom data
|
||||
$data->jobData->batchSize = $this->batchSize;
|
||||
$data->jobData->taskName = $this->taskName;
|
||||
$data->jobData->classes = $this->classes;
|
||||
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function setJobData($totalSteps, $currentStep, $isComplete, $jobData, $messages) {
|
||||
parent::setJobData($totalSteps, $currentStep, $isComplete, $jobData, $messages);
|
||||
|
||||
// Custom data
|
||||
$this->batchSize = $jobData->batchSize;
|
||||
$this->taskName = $jobData->taskName;
|
||||
$this->classes = $jobData->classes;
|
||||
}
|
||||
|
||||
public function getSignature() {
|
||||
return __CLASS__;
|
||||
}
|
||||
|
||||
public function getTitle() {
|
||||
return 'Solr Reindex Job';
|
||||
}
|
||||
|
||||
public function process() {
|
||||
$logger = $this->getLogger();
|
||||
if($this->jobFinished()) {
|
||||
$logger->notice("reindex already complete");
|
||||
return;
|
||||
}
|
||||
|
||||
// Send back to processor
|
||||
$logger->info("Beginning init of reindex");
|
||||
$this
|
||||
->getHandler()
|
||||
->runReindex($logger, $this->batchSize, $this->taskName, $this->classes);
|
||||
$logger->info("Completed init of reindex");
|
||||
$this->isComplete = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get size of batch
|
||||
*
|
||||
* @return int
|
||||
*/
|
||||
public function getBatchSize() {
|
||||
return $this->batchSize;
|
||||
}
|
||||
}
|
123
code/solr/reindex/jobs/SolrReindexQueuedJobBase.php
Normal file
123
code/solr/reindex/jobs/SolrReindexQueuedJobBase.php
Normal file
@ -0,0 +1,123 @@
|
||||
<?php
|
||||
|
||||
use Monolog\Logger;
|
||||
use Psr\Log\LoggerInterface;
|
||||
|
||||
if(!interface_exists('QueuedJob')) return;
|
||||
|
||||
/**
|
||||
* Base class for jobs which perform re-index
|
||||
*/
|
||||
abstract class SolrReindexQueuedJobBase implements QueuedJob {
|
||||
|
||||
/**
|
||||
* Flag whether this job is done
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
protected $isComplete;
|
||||
|
||||
/**
|
||||
* List of messages
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $messages;
|
||||
|
||||
/**
|
||||
* Logger to use for this job
|
||||
*
|
||||
* @var LoggerInterface
|
||||
*/
|
||||
protected $logger;
|
||||
|
||||
public function __construct() {
|
||||
$this->isComplete = false;
|
||||
$this->messages = array();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return SearchLogFactory
|
||||
*/
|
||||
protected function getLoggerFactory() {
|
||||
return Injector::inst()->get('SearchLogFactory');
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a logger for this job
|
||||
*
|
||||
* @return LoggerInterface
|
||||
*/
|
||||
protected function getLogger() {
|
||||
if($this->logger) {
|
||||
return $this->logger;
|
||||
}
|
||||
|
||||
// Set logger for this job
|
||||
$this->logger = $this
|
||||
->getLoggerFactory()
|
||||
->getQueuedJobLogger($this);
|
||||
return $this->logger;
|
||||
}
|
||||
|
||||
/**
|
||||
* Assign custom logger for this job
|
||||
*
|
||||
* @param LoggerInterface $logger
|
||||
*/
|
||||
public function setLogger($logger) {
|
||||
$this->logger = $logger;
|
||||
}
|
||||
|
||||
public function getJobData() {
|
||||
$data = new stdClass();
|
||||
|
||||
// Standard fields
|
||||
$data->totalSteps = 1;
|
||||
$data->currentStep = $this->isComplete ? 0 : 1;
|
||||
$data->isComplete = $this->isComplete;
|
||||
$data->messages = $this->messages;
|
||||
|
||||
// Custom data
|
||||
$data->jobData = new stdClass();
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function setJobData($totalSteps, $currentStep, $isComplete, $jobData, $messages) {
|
||||
$this->isComplete = $isComplete;
|
||||
$this->messages = $messages;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the reindex handler
|
||||
*
|
||||
* @return SolrReindexHandler
|
||||
*/
|
||||
protected function getHandler() {
|
||||
return Injector::inst()->get('SolrReindexHandler');
|
||||
}
|
||||
|
||||
public function jobFinished() {
|
||||
return $this->isComplete;
|
||||
}
|
||||
|
||||
public function prepareForRestart() {
|
||||
// NOOP
|
||||
}
|
||||
|
||||
public function setup() {
|
||||
// NOOP
|
||||
}
|
||||
|
||||
public function afterComplete() {
|
||||
// NOOP
|
||||
}
|
||||
|
||||
public function getJobType() {
|
||||
return QueuedJob::QUEUED;
|
||||
}
|
||||
|
||||
public function addMessage($message) {
|
||||
$this->messages[] = $message;
|
||||
}
|
||||
}
|
98
code/utils/logging/MonologFactory.php
Normal file
98
code/utils/logging/MonologFactory.php
Normal file
@ -0,0 +1,98 @@
|
||||
<?php
|
||||
|
||||
use Monolog\Formatter\FormatterInterface;
|
||||
use Monolog\Formatter\LineFormatter;
|
||||
use Monolog\Handler\HandlerInterface;
|
||||
use Monolog\Logger;
|
||||
|
||||
/**
|
||||
* Provides logging based on monolog
|
||||
*/
|
||||
class MonologFactory implements SearchLogFactory {
|
||||
|
||||
public function getOutputLogger($name, $verbose) {
|
||||
$logger = $this->getLoggerFor($name);
|
||||
$formatter = $this->getFormatter();
|
||||
|
||||
// Notice handling
|
||||
if($verbose) {
|
||||
$messageHandler = $this->getStreamHandler($formatter, 'php://stdout', Logger::INFO);
|
||||
$logger->pushHandler($messageHandler);
|
||||
}
|
||||
|
||||
// Error handling. buble is false so that errors aren't logged twice
|
||||
$errorHandler = $this->getStreamHandler($formatter, 'php://stderr', Logger::ERROR, false);
|
||||
$logger->pushHandler($errorHandler);
|
||||
return $logger;
|
||||
}
|
||||
|
||||
public function getQueuedJobLogger($job) {
|
||||
$logger = $this->getLoggerFor(get_class($job));
|
||||
$handler = $this->getJobHandler($job);
|
||||
$logger->pushHandler($handler);
|
||||
return $logger;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a handler for the given stream
|
||||
*
|
||||
* @param FormatterInterface $formatter
|
||||
* @param string $stream Name of preferred stream
|
||||
* @param int $level
|
||||
* @param bool $bubble
|
||||
* @return HandlerInterface
|
||||
*/
|
||||
protected function getStreamHandler(FormatterInterface $formatter, $stream, $level = Logger::DEBUG, $bubble = true) {
|
||||
// Unless cli, force output to php://output
|
||||
$stream = Director::is_cli() ? $stream : 'php://output';
|
||||
$handler = Injector::inst()->createWithArgs(
|
||||
'Monolog\Handler\StreamHandler',
|
||||
array($stream, $level, $bubble)
|
||||
);
|
||||
$handler->setFormatter($formatter);
|
||||
return $handler;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a formatter for standard output
|
||||
*
|
||||
* @return FormatterInterface
|
||||
*/
|
||||
protected function getFormatter() {
|
||||
// Get formatter
|
||||
$format = LineFormatter::SIMPLE_FORMAT;
|
||||
if(!Director::is_cli()) {
|
||||
$format = "<p>$format</p>";
|
||||
}
|
||||
return Injector::inst()->createWithArgs(
|
||||
'Monolog\Formatter\LineFormatter',
|
||||
array($format)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a logger for a named class
|
||||
*
|
||||
* @param string $name
|
||||
* @return Logger
|
||||
*/
|
||||
protected function getLoggerFor($name) {
|
||||
return Injector::inst()->createWithArgs(
|
||||
'Monolog\Logger',
|
||||
array(strtolower($name))
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate handler for a job object
|
||||
*
|
||||
* @param QueuedJob $job
|
||||
* @return HandlerInterface
|
||||
*/
|
||||
protected function getJobHandler($job) {
|
||||
return Injector::inst()->createWithArgs(
|
||||
'QueuedJobLogHandler',
|
||||
array($job, Logger::INFO)
|
||||
);
|
||||
}
|
||||
}
|
53
code/utils/logging/QueuedJobLogHandler.php
Normal file
53
code/utils/logging/QueuedJobLogHandler.php
Normal file
@ -0,0 +1,53 @@
|
||||
<?php
|
||||
|
||||
use Monolog\Handler\AbstractProcessingHandler;
|
||||
use Monolog\Logger;
|
||||
|
||||
if(!interface_exists('QueuedJob')) return;
|
||||
|
||||
/**
|
||||
* Handler for logging events into QueuedJob message data
|
||||
*/
|
||||
class QueuedJobLogHandler extends AbstractProcessingHandler {
|
||||
|
||||
/**
|
||||
* Job to log to
|
||||
*
|
||||
* @var QueuedJob
|
||||
*/
|
||||
protected $queuedJob;
|
||||
|
||||
/**
|
||||
* @param QueuedJob $queuedJob Job to log to
|
||||
* @param integer $level The minimum logging level at which this handler will be triggered
|
||||
* @param Boolean $bubble Whether the messages that are handled can bubble up the stack or not
|
||||
*/
|
||||
public function __construct(QueuedJob $queuedJob, $level = Logger::DEBUG, $bubble = true) {
|
||||
parent::__construct($level, $bubble);
|
||||
$this->setQueuedJob($queuedJob);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a new queuedjob
|
||||
*
|
||||
* @param QueuedJob $queuedJob
|
||||
*/
|
||||
public function setQueuedJob(QueuedJob $queuedJob) {
|
||||
$this->queuedJob = $queuedJob;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get queuedjob
|
||||
*
|
||||
* @return QueuedJob
|
||||
*/
|
||||
public function getQueuedJob() {
|
||||
return $this->queuedJob;
|
||||
}
|
||||
|
||||
protected function write(array $record) {
|
||||
// Write formatted message
|
||||
$this->getQueuedJob()->addMessage($record['formatted']);
|
||||
}
|
||||
|
||||
}
|
23
code/utils/logging/SearchLogFactory.php
Normal file
23
code/utils/logging/SearchLogFactory.php
Normal file
@ -0,0 +1,23 @@
|
||||
<?php
|
||||
|
||||
use Psr\Log;
|
||||
|
||||
interface SearchLogFactory {
|
||||
|
||||
/**
|
||||
* Make a logger for a queuedjob
|
||||
*
|
||||
* @param QueuedJob $job
|
||||
* @return Log
|
||||
*/
|
||||
public function getQueuedJobLogger($job);
|
||||
|
||||
/**
|
||||
* Get an output logger with the given verbosity
|
||||
*
|
||||
* @param string $name
|
||||
* @param bool $verbose
|
||||
* @return Log
|
||||
*/
|
||||
public function getOutputLogger($name, $verbose);
|
||||
}
|
@ -10,7 +10,8 @@
|
||||
}
|
||||
],
|
||||
"require": {
|
||||
"silverstripe/framework": "~3.1"
|
||||
"silverstripe/framework": "~3.1",
|
||||
"monolog/monolog": "~1.15"
|
||||
},
|
||||
"require-dev": {
|
||||
"silverstripe/cms": "~3.1",
|
||||
|
@ -94,13 +94,22 @@ based on ORM manipulations of the underlying data.
|
||||
For example, calling `$myPage->write()` will automatically
|
||||
update the index entry for this record (and all its variants).
|
||||
|
||||
You can narrow down the operation with the following options:
|
||||
This task has the following options:
|
||||
|
||||
- `verbose`: Debug information
|
||||
|
||||
Internally, depending on what job processing backend you have configured (such as queuedjobs)
|
||||
individual tasks for re-indexing groups of records may either be performed behind the scenes
|
||||
as crontasks, or via separate processes initiated by the current request.
|
||||
|
||||
Internally groups of records are grouped into sizes of 200. You can configure this
|
||||
group sizing by using the `Solr_Reindex.recordsPerRequest` config.
|
||||
|
||||
|
||||
:::yaml
|
||||
Solr_Reindex:
|
||||
recordsPerRequest: 150
|
||||
|
||||
- `index`: PHP class name of an index
|
||||
- `class`: PHP model class to reindex
|
||||
- `start`: Offset (applies to matched records)
|
||||
- `variantstate`: JSON encoded string with state, e.g. '{"SearchVariantVersioned":"Stage"}'
|
||||
- `verbose`: Debug information
|
||||
|
||||
Note: The Solr indexes will be stored as binary files inside your SilverStripe project.
|
||||
You can also copy the `thirdparty/` solr directory somewhere else,
|
||||
|
219
tests/SolrReindexQueuedTest.php
Normal file
219
tests/SolrReindexQueuedTest.php
Normal file
@ -0,0 +1,219 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Additional tests of solr reindexing processes when run with queuedjobs
|
||||
*/
|
||||
class SolrReindexQueuedTest extends SapphireTest {
|
||||
|
||||
protected $usesDatabase = true;
|
||||
|
||||
protected $extraDataObjects = array(
|
||||
'SolrReindexTest_Item'
|
||||
);
|
||||
|
||||
/**
|
||||
* Forced index for testing
|
||||
*
|
||||
* @var SolrReindexTest_Index
|
||||
*/
|
||||
protected $index = null;
|
||||
|
||||
/**
|
||||
* Mock service
|
||||
*
|
||||
* @var SolrService
|
||||
*/
|
||||
protected $service = null;
|
||||
|
||||
public function setUp() {
|
||||
parent::setUp();
|
||||
|
||||
if (!class_exists('Phockito')) {
|
||||
$this->skipTest = true;
|
||||
return $this->markTestSkipped("These tests need the Phockito module installed to run");
|
||||
}
|
||||
|
||||
if(!interface_exists('QueuedJob')) {
|
||||
$this->skipTest = true;
|
||||
return $this->markTestSkipped("These tests need the QueuedJobs module installed to run");
|
||||
}
|
||||
|
||||
// Set queued handler for reindex
|
||||
Config::inst()->update('Injector', 'SolrReindexHandler', array(
|
||||
'class' => 'SolrReindexQueuedHandler'
|
||||
));
|
||||
Injector::inst()->registerService(new SolrReindexQueuedHandler(), 'SolrReindexHandler');
|
||||
|
||||
// Set test variant
|
||||
SolrReindexTest_Variant::enable();
|
||||
|
||||
// Set index list
|
||||
$this->service = $this->getServiceMock();
|
||||
$this->index = singleton('SolrReindexTest_Index');
|
||||
$this->index->setService($this->service);
|
||||
FullTextSearch::force_index_list($this->index);
|
||||
}
|
||||
|
||||
/**
|
||||
* Populate database with dummy dataset
|
||||
*
|
||||
* @param int $number Number of records to create in each variant
|
||||
*/
|
||||
protected function createDummyData($number) {
|
||||
// Populate dataobjects. Use truncate to generate predictable IDs
|
||||
DB::query('TRUNCATE "SolrReindexTest_Item"');
|
||||
|
||||
// Note that we don't create any records in variant = 2, to represent a variant
|
||||
// that should be cleared without any re-indexes performed
|
||||
foreach(array(0, 1) as $variant) {
|
||||
for($i = 1; $i <= $number; $i++) {
|
||||
$item = new SolrReindexTest_Item();
|
||||
$item->Variant = $variant;
|
||||
$item->Title = "Item $variant / $i";
|
||||
$item->write();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Mock service
|
||||
*
|
||||
* @return SolrService
|
||||
*/
|
||||
protected function getServiceMock() {
|
||||
return Phockito::mock('Solr4Service');
|
||||
}
|
||||
|
||||
public function tearDown() {
|
||||
FullTextSearch::force_index_list();
|
||||
SolrReindexTest_Variant::disable();
|
||||
parent::tearDown();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the reindex handler
|
||||
*
|
||||
* @return SolrReindexHandler
|
||||
*/
|
||||
protected function getHandler() {
|
||||
return Injector::inst()->get('SolrReindexHandler');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return SolrReindexQueuedTest_Service
|
||||
*/
|
||||
protected function getQueuedJobService() {
|
||||
return singleton('SolrReindexQueuedTest_Service');
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that reindex will generate a top top level queued job, and executing this will perform
|
||||
* the necessary initialisation of the grouped queued jobs
|
||||
*/
|
||||
public function testReindexSegmentsGroups() {
|
||||
$this->createDummyData(18);
|
||||
|
||||
// Create pre-existing jobs
|
||||
$this->getQueuedJobService()->queueJob(new SolrReindexQueuedJob());
|
||||
$this->getQueuedJobService()->queueJob(new SolrReindexGroupQueuedJob());
|
||||
$this->getQueuedJobService()->queueJob(new SolrReindexGroupQueuedJob());
|
||||
|
||||
// Initiate re-index
|
||||
$logger = new SolrReindexTest_RecordingLogger();
|
||||
$this->getHandler()->triggerReindex($logger, 6, 'Solr_Reindex');
|
||||
|
||||
// Old jobs should be cancelled
|
||||
$this->assertEquals(1, $logger->countMessages('Cancelled 1 re-index tasks and 2 re-index groups'));
|
||||
$this->assertEquals(1, $logger->countMessages('Queued Solr Reindex Job'));
|
||||
|
||||
// Next job should be queue job
|
||||
$job = $this->getQueuedJobService()->getNextJob();
|
||||
$this->assertInstanceOf('SolrReindexQueuedJob', $job);
|
||||
$this->assertEquals(6, $job->getBatchSize());
|
||||
|
||||
// Test that necessary items are created
|
||||
$logger->clear();
|
||||
$job->setLogger($logger);
|
||||
$job->process();
|
||||
|
||||
// Deletes are performed in the main task prior to individual groups being processed
|
||||
// 18 records means 3 groups of 6 in each variant (6 total)
|
||||
Phockito::verify($this->service, 2)
|
||||
->deleteByQuery(anything());
|
||||
$this->assertEquals(1, $logger->countMessages('Beginning init of reindex'));
|
||||
$this->assertEquals(6, $logger->countMessages('Queued Solr Reindex Group '));
|
||||
$this->assertEquals(3, $logger->countMessages(' of SolrReindexTest_Item in {"SolrReindexTest_Variant":"0"}'));
|
||||
$this->assertEquals(3, $logger->countMessages(' of SolrReindexTest_Item in {"SolrReindexTest_Variant":"1"}'));
|
||||
$this->assertEquals(1, $logger->countMessages('Completed init of reindex'));
|
||||
|
||||
|
||||
// Test that invalid classes are removed
|
||||
$this->assertNotEmpty($logger->getMessages('Clearing obsolete classes from SolrReindexTest_Index'));
|
||||
Phockito::verify($this->service, 1)
|
||||
->deleteByQuery('-(ClassHierarchy:SolrReindexTest_Item)');
|
||||
|
||||
// Test that valid classes in invalid variants are removed
|
||||
$this->assertNotEmpty($logger->getMessages(
|
||||
'Clearing all records of type SolrReindexTest_Item in the current state: {"SolrReindexTest_Variant":"2"}'
|
||||
));
|
||||
Phockito::verify($this->service, 1)
|
||||
->deleteByQuery('+(ClassHierarchy:SolrReindexTest_Item) +(_testvariant:"2")');
|
||||
}
|
||||
|
||||
/**
|
||||
* Test index processing on individual groups
|
||||
*/
|
||||
public function testRunGroup() {
|
||||
$this->createDummyData(18);
|
||||
|
||||
// Just do what the SolrReindexQueuedJob would do to create each sub
|
||||
$logger = new SolrReindexTest_RecordingLogger();
|
||||
$this->getHandler()->runReindex($logger, 6, 'Solr_Reindex');
|
||||
|
||||
// Assert jobs are created
|
||||
$this->assertEquals(6, $logger->countMessages('Queued Solr Reindex Group'));
|
||||
|
||||
// Check next job is a group queued job
|
||||
$job = $this->getQueuedJobService()->getNextJob();
|
||||
$this->assertInstanceOf('SolrReindexGroupQueuedJob', $job);
|
||||
$this->assertEquals(
|
||||
'Solr Reindex Group (1/3) of SolrReindexTest_Item in {"SolrReindexTest_Variant":"0"}',
|
||||
$job->getTitle()
|
||||
);
|
||||
|
||||
// Running this job performs the necessary reindex
|
||||
$logger->clear();
|
||||
$job->setLogger($logger);
|
||||
$job->process();
|
||||
|
||||
// Check tasks completed (as per non-queuedjob version)
|
||||
$this->assertEquals(1, $logger->countMessages('Beginning reindex group'));
|
||||
$this->assertEquals(1, $logger->countMessages('Adding SolrReindexTest_Item'));
|
||||
$this->assertEquals(1, $logger->countMessages('Queuing commit on all changes'));
|
||||
$this->assertEquals(1, $logger->countMessages('Completed reindex group'));
|
||||
|
||||
// Check IDs
|
||||
$idMessage = $logger->filterMessages('Updated ');
|
||||
$this->assertNotEmpty(preg_match('/^Updated (?<ids>[,\d]+)/i', $idMessage[0], $matches));
|
||||
$ids = array_unique(explode(',', $matches['ids']));
|
||||
$this->assertEquals(6, count($ids));
|
||||
foreach($ids as $id) {
|
||||
// Each id should be % 3 == 0
|
||||
$this->assertEquals(0, $id % 3, "ID $id Should match pattern ID % 3 = 0");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(!class_exists('QueuedJobService')) return;
|
||||
|
||||
class SolrReindexQueuedTest_Service extends QueuedJobService implements TestOnly {
|
||||
|
||||
/**
|
||||
* @return QueuedJob
|
||||
*/
|
||||
public function getNextJob() {
|
||||
$job = $this->getNextPendingJob();
|
||||
return $this->initialiseJob($job);
|
||||
}
|
||||
|
||||
}
|
531
tests/SolrReindexTest.php
Normal file
531
tests/SolrReindexTest.php
Normal file
@ -0,0 +1,531 @@
|
||||
<?php
|
||||
|
||||
use Monolog\Handler\AbstractProcessingHandler;
|
||||
use Monolog\Handler\HandlerInterface;
|
||||
use Monolog\Logger;
|
||||
use Psr\Log\LoggerInterface;
|
||||
|
||||
if (class_exists('Phockito')) Phockito::include_hamcrest();
|
||||
|
||||
class SolrReindexTest extends SapphireTest {
|
||||
|
||||
protected $usesDatabase = true;
|
||||
|
||||
protected $extraDataObjects = array(
|
||||
'SolrReindexTest_Item'
|
||||
);
|
||||
|
||||
/**
|
||||
* Forced index for testing
|
||||
*
|
||||
* @var SolrReindexTest_Index
|
||||
*/
|
||||
protected $index = null;
|
||||
|
||||
/**
|
||||
* Mock service
|
||||
*
|
||||
* @var SolrService
|
||||
*/
|
||||
protected $service = null;
|
||||
|
||||
public function setUp() {
|
||||
parent::setUp();
|
||||
|
||||
if (!class_exists('Phockito')) {
|
||||
$this->skipTest = true;
|
||||
return $this->markTestSkipped("These tests need the Phockito module installed to run");
|
||||
}
|
||||
|
||||
// Set test handler for reindex
|
||||
Config::inst()->update('Injector', 'SolrReindexHandler', array(
|
||||
'class' => 'SolrReindexTest_TestHandler'
|
||||
));
|
||||
Injector::inst()->registerService(new SolrReindexTest_TestHandler(), 'SolrReindexHandler');
|
||||
|
||||
// Set test variant
|
||||
SolrReindexTest_Variant::enable();
|
||||
|
||||
// Set index list
|
||||
$this->service = $this->getServiceMock();
|
||||
$this->index = singleton('SolrReindexTest_Index');
|
||||
$this->index->setService($this->service);
|
||||
FullTextSearch::force_index_list($this->index);
|
||||
}
|
||||
|
||||
/**
|
||||
* Populate database with dummy dataset
|
||||
*
|
||||
* @param int $number Number of records to create in each variant
|
||||
*/
|
||||
protected function createDummyData($number) {
|
||||
// Populate dataobjects. Use truncate to generate predictable IDs
|
||||
DB::query('TRUNCATE "SolrReindexTest_Item"');
|
||||
|
||||
// Note that we don't create any records in variant = 2, to represent a variant
|
||||
// that should be cleared without any re-indexes performed
|
||||
foreach(array(0, 1) as $variant) {
|
||||
for($i = 1; $i <= $number; $i++) {
|
||||
$item = new SolrReindexTest_Item();
|
||||
$item->Variant = $variant;
|
||||
$item->Title = "Item $variant / $i";
|
||||
$item->write();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Mock service
|
||||
*
|
||||
* @return SolrService
|
||||
*/
|
||||
protected function getServiceMock() {
|
||||
return Phockito::mock('Solr4Service');
|
||||
}
|
||||
|
||||
public function tearDown() {
|
||||
FullTextSearch::force_index_list();
|
||||
SolrReindexTest_Variant::disable();
|
||||
parent::tearDown();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the reindex handler
|
||||
*
|
||||
* @return SolrReindexHandler
|
||||
*/
|
||||
protected function getHandler() {
|
||||
return Injector::inst()->get('SolrReindexHandler');
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure the test variant is up and running properly
|
||||
*/
|
||||
public function testVariant() {
|
||||
// State defaults to 0
|
||||
$variant = SearchVariant::current_state();
|
||||
$this->assertEquals(
|
||||
array(
|
||||
"SolrReindexTest_Variant" => "0"
|
||||
),
|
||||
$variant
|
||||
);
|
||||
|
||||
// All states enumerated
|
||||
$allStates = iterator_to_array(SearchVariant::reindex_states());
|
||||
$this->assertEquals(
|
||||
array(
|
||||
array(
|
||||
"SolrReindexTest_Variant" => "0"
|
||||
),
|
||||
array(
|
||||
"SolrReindexTest_Variant" => "1"
|
||||
),
|
||||
array(
|
||||
"SolrReindexTest_Variant" => "2"
|
||||
)
|
||||
),
|
||||
$allStates
|
||||
);
|
||||
|
||||
// Check correct items created and that filtering on variant works
|
||||
$this->createDummyData(120);
|
||||
SolrReindexTest_Variant::set_current(2);
|
||||
$this->assertEquals(0, SolrReindexTest_Item::get()->count());
|
||||
SolrReindexTest_Variant::set_current(1);
|
||||
$this->assertEquals(120, SolrReindexTest_Item::get()->count());
|
||||
SolrReindexTest_Variant::set_current(0);
|
||||
$this->assertEquals(120, SolrReindexTest_Item::get()->count());
|
||||
SolrReindexTest_Variant::disable();
|
||||
$this->assertEquals(240, SolrReindexTest_Item::get()->count());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Given the invocation of a new re-index with a given set of data, ensure that the necessary
|
||||
* list of groups are created and segmented for each state
|
||||
*
|
||||
* Test should work fine with any variants (versioned, subsites, etc) specified
|
||||
*/
|
||||
public function testReindexSegmentsGroups() {
|
||||
$this->createDummyData(120);
|
||||
|
||||
// Initiate re-index
|
||||
$logger = new SolrReindexTest_RecordingLogger();
|
||||
$this->getHandler()->runReindex($logger, 21, 'Solr_Reindex');
|
||||
|
||||
// Test that invalid classes are removed
|
||||
$this->assertNotEmpty($logger->getMessages('Clearing obsolete classes from SolrReindexTest_Index'));
|
||||
Phockito::verify($this->service, 1)
|
||||
->deleteByQuery('-(ClassHierarchy:SolrReindexTest_Item)');
|
||||
|
||||
// Test that valid classes in invalid variants are removed
|
||||
$this->assertNotEmpty($logger->getMessages(
|
||||
'Clearing all records of type SolrReindexTest_Item in the current state: {"SolrReindexTest_Variant":"2"}'
|
||||
));
|
||||
Phockito::verify($this->service, 1)
|
||||
->deleteByQuery('+(ClassHierarchy:SolrReindexTest_Item) +(_testvariant:"2")');
|
||||
|
||||
// 120x2 grouped into groups of 21 results in 12 groups
|
||||
$this->assertEquals(12, $logger->countMessages('Called processGroup with '));
|
||||
$this->assertEquals(6, $logger->countMessages('{"SolrReindexTest_Variant":"0"}'));
|
||||
$this->assertEquals(6, $logger->countMessages('{"SolrReindexTest_Variant":"1"}'));
|
||||
|
||||
// Given that there are two variants, there should be two group ids of each number
|
||||
$this->assertEquals(2, $logger->countMessages(' SolrReindexTest_Item, group 0 of 6'));
|
||||
$this->assertEquals(2, $logger->countMessages(' SolrReindexTest_Item, group 1 of 6'));
|
||||
$this->assertEquals(2, $logger->countMessages(' SolrReindexTest_Item, group 2 of 6'));
|
||||
$this->assertEquals(2, $logger->countMessages(' SolrReindexTest_Item, group 3 of 6'));
|
||||
$this->assertEquals(2, $logger->countMessages(' SolrReindexTest_Item, group 4 of 6'));
|
||||
$this->assertEquals(2, $logger->countMessages(' SolrReindexTest_Item, group 5 of 6'));
|
||||
|
||||
// Check various group sizes
|
||||
$logger->clear();
|
||||
$this->getHandler()->runReindex($logger, 120, 'Solr_Reindex');
|
||||
$this->assertEquals(2, $logger->countMessages('Called processGroup with '));
|
||||
$logger->clear();
|
||||
$this->getHandler()->runReindex($logger, 119, 'Solr_Reindex');
|
||||
$this->assertEquals(4, $logger->countMessages('Called processGroup with '));
|
||||
$logger->clear();
|
||||
$this->getHandler()->runReindex($logger, 121, 'Solr_Reindex');
|
||||
$this->assertEquals(2, $logger->countMessages('Called processGroup with '));
|
||||
$logger->clear();
|
||||
$this->getHandler()->runReindex($logger, 2, 'Solr_Reindex');
|
||||
$this->assertEquals(120, $logger->countMessages('Called processGroup with '));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test index processing on individual groups
|
||||
*/
|
||||
public function testRunGroup() {
|
||||
$this->createDummyData(120);
|
||||
$logger = new SolrReindexTest_RecordingLogger();
|
||||
|
||||
// Initiate re-index of third group (index 2 of 6)
|
||||
$state = array('SolrReindexTest_Variant' => '1');
|
||||
$this->getHandler()->runGroup($logger, $this->index, $state, 'SolrReindexTest_Item', 6, 2);
|
||||
$idMessage = $logger->filterMessages('Updated ');
|
||||
$this->assertNotEmpty(preg_match('/^Updated (?<ids>[,\d]+)/i', $idMessage[0], $matches));
|
||||
$ids = array_unique(explode(',', $matches['ids']));
|
||||
|
||||
// Test successful
|
||||
$this->assertNotEmpty($logger->getMessages('Adding SolrReindexTest_Item'));
|
||||
$this->assertNotEmpty($logger->getMessages('Done'));
|
||||
|
||||
// Test that items in this variant / group are cleared from solr
|
||||
Phockito::verify($this->service, 1)->deleteByQuery(
|
||||
'+(ClassHierarchy:SolrReindexTest_Item) +_query_:"{!frange l=2 u=2}mod(ID, 6)" +(_testvariant:"1")'
|
||||
);
|
||||
|
||||
// Test that items in this variant / group are re-indexed
|
||||
// 120 divided into 6 groups should be 20 at least (max 21)
|
||||
$this->assertEquals(21, count($ids), 'Group size is about 20', 1);
|
||||
foreach($ids as $id) {
|
||||
// Each id should be % 6 == 2
|
||||
$this->assertEquals(2, $id % 6, "ID $id Should match pattern ID % 6 = 2");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that running all groups covers the entire range of dataobject IDs
|
||||
*/
|
||||
public function testRunAllGroups() {
|
||||
$this->createDummyData(120);
|
||||
$logger = new SolrReindexTest_RecordingLogger();
|
||||
|
||||
// Test that running all groups covers the complete set of ids
|
||||
$state = array('SolrReindexTest_Variant' => '1');
|
||||
for($i = 0; $i < 6; $i++) {
|
||||
// See testReindexSegmentsGroups for test that each of these states is invoked during a full reindex
|
||||
$this
|
||||
->getHandler()
|
||||
->runGroup($logger, $this->index, $state, 'SolrReindexTest_Item', 6, $i);
|
||||
}
|
||||
|
||||
// Count all ids updated
|
||||
$ids = array();
|
||||
foreach($logger->filterMessages('Updated ') as $message) {
|
||||
$this->assertNotEmpty(preg_match('/^Updated (?<ids>[,\d]+)/', $message, $matches));
|
||||
$ids = array_unique(array_merge($ids, explode(',', $matches['ids'])));
|
||||
}
|
||||
|
||||
// Check ids
|
||||
$this->assertEquals(120, count($ids));
|
||||
Phockito::verify($this->service, 6)->deleteByQuery(anything());
|
||||
Phockito::verify($this->service, 1)->deleteByQuery(
|
||||
'+(ClassHierarchy:SolrReindexTest_Item) +_query_:"{!frange l=0 u=0}mod(ID, 6)" +(_testvariant:"1")'
|
||||
);
|
||||
Phockito::verify($this->service, 1)->deleteByQuery(
|
||||
'+(ClassHierarchy:SolrReindexTest_Item) +_query_:"{!frange l=1 u=1}mod(ID, 6)" +(_testvariant:"1")'
|
||||
);
|
||||
Phockito::verify($this->service, 1)->deleteByQuery(
|
||||
'+(ClassHierarchy:SolrReindexTest_Item) +_query_:"{!frange l=2 u=2}mod(ID, 6)" +(_testvariant:"1")'
|
||||
);
|
||||
Phockito::verify($this->service, 1)->deleteByQuery(
|
||||
'+(ClassHierarchy:SolrReindexTest_Item) +_query_:"{!frange l=3 u=3}mod(ID, 6)" +(_testvariant:"1")'
|
||||
);
|
||||
Phockito::verify($this->service, 1)->deleteByQuery(
|
||||
'+(ClassHierarchy:SolrReindexTest_Item) +_query_:"{!frange l=4 u=4}mod(ID, 6)" +(_testvariant:"1")'
|
||||
);
|
||||
Phockito::verify($this->service, 1)->deleteByQuery(
|
||||
'+(ClassHierarchy:SolrReindexTest_Item) +_query_:"{!frange l=5 u=5}mod(ID, 6)" +(_testvariant:"1")'
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides a wrapper for testing SolrReindexBase
|
||||
*/
|
||||
class SolrReindexTest_TestHandler extends SolrReindexBase {
|
||||
|
||||
public function processGroup(
|
||||
LoggerInterface $logger, SolrIndex $indexInstance, $state, $class, $groups, $group, $taskName
|
||||
) {
|
||||
$indexName = $indexInstance->getIndexName();
|
||||
$stateName = json_encode($state);
|
||||
$logger->info("Called processGroup with {$indexName}, {$stateName}, {$class}, group {$group} of {$groups}");
|
||||
}
|
||||
|
||||
public function triggerReindex(LoggerInterface $logger, $batchSize, $taskName, $classes = null) {
|
||||
$logger->info("Called triggerReindex");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
class SolrReindexTest_Index extends SolrIndex implements TestOnly {
|
||||
public function init() {
|
||||
$this->addClass('SolrReindexTest_Item');
|
||||
$this->addAllFulltextFields();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Does not have any variant extensions
|
||||
*/
|
||||
class SolrReindexTest_Item extends DataObject implements TestOnly {
|
||||
|
||||
private static $extensions = array(
|
||||
'SolrReindexTest_ItemExtension'
|
||||
);
|
||||
|
||||
private static $db = array(
|
||||
'Title' => 'Varchar(255)',
|
||||
'Variant' => 'Int(0)'
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Select only records in the current variant
|
||||
*/
|
||||
class SolrReindexTest_ItemExtension extends DataExtension implements TestOnly {
|
||||
|
||||
/**
|
||||
* Filter records on the current variant
|
||||
*
|
||||
* @param SQLQuery $query
|
||||
* @param DataQuery $dataQuery
|
||||
*/
|
||||
public function augmentSQL(SQLQuery &$query, DataQuery &$dataQuery = null) {
|
||||
$variant = SolrReindexTest_Variant::get_current();
|
||||
if($variant !== null && !$query->filtersOnID()) {
|
||||
$sqlVariant = Convert::raw2sql($variant);
|
||||
$query->addWhere("\"Variant\" = '{$sqlVariant}'");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Dummy variant that selects items with field Varient matching the current value
|
||||
*
|
||||
* Variant states are 0 and 1, or null if disabled
|
||||
*/
|
||||
class SolrReindexTest_Variant extends SearchVariant implements TestOnly {
|
||||
|
||||
/**
|
||||
* Value of this variant (either null, 0, or 1)
|
||||
*
|
||||
* @var int|null
|
||||
*/
|
||||
protected static $current = null;
|
||||
|
||||
/**
|
||||
* Activate this variant
|
||||
*/
|
||||
public static function enable() {
|
||||
self::disable();
|
||||
|
||||
self::$current = 0;
|
||||
self::$variants = array(
|
||||
'SolrReindexTest_Variant' => singleton('SolrReindexTest_Variant')
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Disable this variant and reset
|
||||
*/
|
||||
public static function disable() {
|
||||
self::$current = null;
|
||||
self::$variants = null;
|
||||
self::$class_variants = array();
|
||||
self::$call_instances = array();
|
||||
}
|
||||
|
||||
public function activateState($state) {
|
||||
self::set_current($state);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the current variant to the given state
|
||||
*
|
||||
* @param int $current 0, 1, 2, or null (disabled)
|
||||
*/
|
||||
public static function set_current($current) {
|
||||
self::$current = $current;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current state
|
||||
*
|
||||
* @return string|null
|
||||
*/
|
||||
public static function get_current() {
|
||||
// Always use string values for states for consistent json_encode value
|
||||
if(isset(self::$current)) {
|
||||
return (string)self::$current;
|
||||
}
|
||||
}
|
||||
|
||||
function alterDefinition($base, $index) {
|
||||
$self = get_class($this);
|
||||
|
||||
$index->filterFields['_testvariant'] = array(
|
||||
'name' => '_testvariant',
|
||||
'field' => '_testvariant',
|
||||
'fullfield' => '_testvariant',
|
||||
'base' => $base,
|
||||
'origin' => $base,
|
||||
'type' => 'Int',
|
||||
'lookup_chain' => array(array('call' => 'variant', 'variant' => $self, 'method' => 'currentState'))
|
||||
);
|
||||
}
|
||||
|
||||
public function alterQuery($query, $index) {
|
||||
// I guess just calling it _testvariant is ok?
|
||||
$query->filter('_testvariant', $this->currentState());
|
||||
}
|
||||
|
||||
public function appliesTo($class, $includeSubclasses) {
|
||||
return $class === 'SolrReindexTest_Item' ||
|
||||
($includeSubclasses && is_subclass_of($class, 'SolrReindexTest_Item', true));
|
||||
}
|
||||
|
||||
public function appliesToEnvironment() {
|
||||
// Set to null to disable
|
||||
return self::$current !== null;
|
||||
}
|
||||
|
||||
public function currentState() {
|
||||
return self::get_current();
|
||||
}
|
||||
|
||||
public function reindexStates() {
|
||||
// Always use string values for states for consistent json_encode value
|
||||
return array('0', '1', '2');
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Test logger for recording messages
|
||||
*/
|
||||
class SolrReindexTest_RecordingLogger extends Logger implements TestOnly {
|
||||
|
||||
/**
|
||||
* @var SolrReindexTest_Handler
|
||||
*/
|
||||
protected $testHandler = null;
|
||||
|
||||
public function __construct($name = 'testlogger', array $handlers = array(), array $processors = array()) {
|
||||
parent::__construct($name, $handlers, $processors);
|
||||
|
||||
$this->testHandler = new SolrReindexTest_Handler();
|
||||
$this->pushHandler($this->testHandler);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function getMessages() {
|
||||
return $this->testHandler->getMessages();
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all messages
|
||||
*/
|
||||
public function clear() {
|
||||
$this->testHandler->clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get messages with the given filter
|
||||
*
|
||||
* @param string $containing
|
||||
* @return array Filtered array
|
||||
*/
|
||||
public function filterMessages($containing) {
|
||||
return array_values(array_filter(
|
||||
$this->getMessages(),
|
||||
function($content) use ($containing) {
|
||||
return stripos($content, $containing) !== false;
|
||||
}
|
||||
));
|
||||
}
|
||||
|
||||
/**
|
||||
* Count all messages containing the given substring
|
||||
*
|
||||
* @param string $containing Message to filter by
|
||||
* @return int
|
||||
*/
|
||||
public function countMessages($containing = null) {
|
||||
if($containing) {
|
||||
$messages = $this->filterMessages($containing);
|
||||
} else {
|
||||
$messages = $this->getMessages();
|
||||
}
|
||||
return count($messages);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Logger for recording messages for later retrieval
|
||||
*/
|
||||
class SolrReindexTest_Handler extends AbstractProcessingHandler implements TestOnly {
|
||||
|
||||
/**
|
||||
* Messages
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $messages = array();
|
||||
|
||||
/**
|
||||
* Get all messages
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getMessages() {
|
||||
return $this->messages;
|
||||
}
|
||||
|
||||
public function clear() {
|
||||
$this->messages = array();
|
||||
}
|
||||
|
||||
protected function write(array $record) {
|
||||
$this->messages[] = $record['message'];
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user