From 4db3b6b0e9e293f015a4dd187b03bd96a1fdcd93 Mon Sep 17 00:00:00 2001 From: Garion Herman Date: Mon, 30 Mar 2020 11:19:22 +1300 Subject: [PATCH] NEW Respect canView permissions during index operations The module now calls canView() on documents to determine whether they should exist in the index at all, no longer solely relying on this check occurring post-search. This decreases the likelihood of data leakage, and should result in more accurate search result pagination. --- src/Search/FullTextSearch.php | 40 ++++++++++++++++- .../Processors/SearchUpdateProcessor.php | 16 ++++++- src/Search/Services/IndexableService.php | 43 ++++++++++++++++--- src/Solr/Reindex/Handlers/SolrReindexBase.php | 13 +++++- tests/SolrIndexTest.php | 7 +++ 5 files changed, 109 insertions(+), 10 deletions(-) diff --git a/src/Search/FullTextSearch.php b/src/Search/FullTextSearch.php index c81ec5b..69ce053 100644 --- a/src/Search/FullTextSearch.php +++ b/src/Search/FullTextSearch.php @@ -15,7 +15,7 @@ class FullTextSearch { protected static $all_indexes = null; - protected static $indexes_by_subclass = array(); + protected static $indexes_by_subclass = []; /** * Optional list of index names to limit to. If left empty, all subclasses of SearchIndex @@ -24,7 +24,19 @@ class FullTextSearch * @var array * @config */ - private static $indexes = array(); + private static $indexes = []; + + /** + * During (re)index of items, the canView method for the item is called against an anonymous user, + * in order to determine whether the item should be included in the index. This is a preventative + * security measure, and has a performance cost. If you are confident that a given DataObject should + * be visible to everyone, or you have other measures in place to secure the contents of the index, + * you can disable this check for that specific class and its descendants. + * + * @var array Should contain FQCNs of classes the check is to be disabled on (e.g. SiteTree::class) + * @config + */ + private static $disable_preindex_canview_check = []; /** * Get all the instantiable search indexes (so all the user created indexes, but not the connector or library level @@ -143,4 +155,28 @@ class FullTextSearch self::$all_indexes[$class] = $index; } } + + /** + * Uses the disable_preindex_canview_check configuration to determine whether the given class should have the + * canView check applied. + * + * @param string $class The FQCN of the class to check + * @return bool + */ + public static function isCanViewCheckDisabledForClass(string $class): bool + { + $disabledCheckClasses = Config::inst()->get(self::class, 'disable_preindex_canview_check'); + + if (empty($disabledCheckClasses)) { + return false; + } + + foreach ($disabledCheckClasses as $disabledCheckClass) { + if ($disabledCheckClass === $class || in_array($disabledCheckClass, class_parents($class))) { + return true; + } + } + + return false; + } } diff --git a/src/Search/Processors/SearchUpdateProcessor.php b/src/Search/Processors/SearchUpdateProcessor.php index de64dfc..4c2c7ca 100644 --- a/src/Search/Processors/SearchUpdateProcessor.php +++ b/src/Search/Processors/SearchUpdateProcessor.php @@ -6,6 +6,7 @@ use SilverStripe\FullTextSearch\Search\Services\IndexableService; use SilverStripe\ORM\DataObject; use SilverStripe\FullTextSearch\Search\Variants\SearchVariant; use SilverStripe\FullTextSearch\Search\FullTextSearch; +use SilverStripe\Security\InheritedPermissions; abstract class SearchUpdateProcessor { @@ -73,6 +74,7 @@ abstract class SearchUpdateProcessor $dirty = $this->getSource(); $indexes = FullTextSearch::get_indexes(); $indexableService = IndexableService::singleton(); + foreach ($dirty as $base => $statefulids) { if (!$statefulids) { continue; @@ -86,10 +88,21 @@ abstract class SearchUpdateProcessor // Ensure that indexes for all new / updated objects are included $objs = DataObject::get($base)->byIDs(array_keys($ids)); + + // Warm the InheritedPermissions cache for the model before calling isIndexable (if applicable) + // TODO: Inline this into IndexableService::areIndexable() method? + if (method_exists($base, 'getPermissionChecker')) { + $base::singleton()->getPermissionChecker()->prePopulatePermissionCache( + InheritedPermissions::VIEW, + array_keys($ids) + ); + } + + /** @var DataObject $obj */ foreach ($objs as $obj) { foreach ($ids[$obj->ID] as $index) { if (!$indexes[$index]->variantStateExcluded($state)) { - // Remove any existing records from index if ShowInSearch is changed to false + // Remove any existing data from index if the object is no longer indexable if (!$indexableService->isIndexable($obj)) { $indexes[$index]->delete($base, $obj->ID, $state); } else { @@ -98,6 +111,7 @@ abstract class SearchUpdateProcessor $dirtyIndexes[$index] = $indexes[$index]; } } + unset($ids[$obj->ID]); } diff --git a/src/Search/Services/IndexableService.php b/src/Search/Services/IndexableService.php index 8f56129..4940123 100644 --- a/src/Search/Services/IndexableService.php +++ b/src/Search/Services/IndexableService.php @@ -4,27 +4,40 @@ namespace SilverStripe\FullTextSearch\Search\Services; use SilverStripe\Core\Extensible; use SilverStripe\Core\Injector\Injectable; +use SilverStripe\FullTextSearch\Search\FullTextSearch; use SilverStripe\ORM\DataObject; -use SilverStripe\ORM\Tests\MySQLDatabaseTest\Data; +use SilverStripe\Security\Member; /** - * Checks if a DataObject is publically viewable thus able to be added or retrieved from a publically searchable index - * Caching results because these checks may be done multiple times as there a few different code paths that search - * results might follow in real-world search implementations + * Checks if a DataObject is publicly viewable, thus able to be added to or retrieved from a publicly searchable index. + * Results are cached because these checks may be run multiple times, as there a few different code paths that search + * results might follow in real-world search implementations. */ class IndexableService { - use Injectable; use Extensible; protected $cache = []; + /** + * Clears the internal indexable cache + */ public function clearCache(): void { $this->cache = []; } + /** + * Checks and caches whether the given DataObject can be indexed. This is determined by two factors: + * + * - Whether the ShowInSearch property / getShowInSearch method evaluates to true + * - Whether the canView method evaluates to true against an anonymous user (optional, can be disabled) + * + * @see FullTextSearch::isCanViewCheckDisabledForClass() + * @param DataObject $obj + * @return bool + */ public function isIndexable(DataObject $obj): bool { // check if is a valid DataObject that has been persisted to the database @@ -44,11 +57,31 @@ class IndexableService $value = false; } + // Run canView check as anonymous user if it hasn't been disabled for this DataObject + $objClass = $obj->getClassName(); + if (!FullTextSearch::isCanViewCheckDisabledForClass($objClass)) { + $value = $value && Member::actAs(null, function () use ($obj, $objClass) { + // Attempt to use optimised permission checker if present. + // NOTE: This reduces the scope of the check to DB-based permissions (custom canView logic is ignored) + if (method_exists($objClass, 'getPermissionChecker')) { + return $objClass::singleton()->getPermissionChecker()->canView($obj->ID); + } + + return $obj->canView(); + }); + } + $this->extend('updateIsIndexable', $obj, $value); $this->cache[$key] = $value; return $value; } + /** + * Generates a unique key to cache each DataObject with. Can be extended via updateCacheKey. + * + * @param DataObject $obj + * @return string + */ protected function getCacheKey(DataObject $obj): string { $key = $obj->ClassName . '_' . $obj->ID; diff --git a/src/Solr/Reindex/Handlers/SolrReindexBase.php b/src/Solr/Reindex/Handlers/SolrReindexBase.php index b022734..498283d 100644 --- a/src/Solr/Reindex/Handlers/SolrReindexBase.php +++ b/src/Solr/Reindex/Handlers/SolrReindexBase.php @@ -12,6 +12,7 @@ use SilverStripe\FullTextSearch\Search\Queries\SearchQuery; use SilverStripe\ORM\DataObject; use SilverStripe\ORM\DataList; use SilverStripe\ORM\DB; +use SilverStripe\Security\InheritedPermissions; /** * Base class for re-indexing of solr content @@ -238,10 +239,18 @@ abstract class SolrReindexBase implements SolrReindexHandler $items = $items->filter('ClassName', $class); } + // Warm the InheritedPermissions cache for the model before calling isIndexable (if applicable) + // TODO: Inline this into IndexableService::areIndexable() method? + if (method_exists($baseClass, 'getPermissionChecker')) { + $baseClass::singleton()->getPermissionChecker()->prePopulatePermissionCache( + InheritedPermissions::VIEW, + $items->column('ID') + ); + } + $indexableService = IndexableService::singleton(); - // ShowInSearch filter - // we cannot use $items->remove($item), as that deletes the record from the database + // Filter out objects that must not be indexed $idsToRemove = []; foreach ($items as $item) { if (!$indexableService->isIndexable($item)) { diff --git a/tests/SolrIndexTest.php b/tests/SolrIndexTest.php index e8f4efe..e25e31b 100644 --- a/tests/SolrIndexTest.php +++ b/tests/SolrIndexTest.php @@ -416,6 +416,13 @@ class SolrIndexTest extends SapphireTest ->setMethods(['addDocument', 'deleteById']) ->getMock(); + // We only want to test the ShowInSearch part of IndexableService, so we disable the canView check + Config::modify()->set( + FullTextSearch::class, + 'disable_preindex_canview_check', + [DataObject::class] + ); + $index = new SolrIndexTest_ShowInSearchIndex(); $index->setService($serviceMock); FullTextSearch::force_index_list($index);