get('class_delimiter') ?? '', $source ?? ''); return $source[0]; } public function __construct() { parent::__construct(); $this->init(); foreach ($this->getClasses() as $class => $options) { SearchVariant::with($class, $options['include_children'])->call('alterDefinition', $class, $this); } $this->buildDependancyList(); } public function __toString() { return 'Search Index ' . get_class($this); } /** * Examines the classes this index is built on to try and find defined fields in the class hierarchy * for those classes. * Looks for db and viewable-data fields, although can't necessarily find type for viewable-data fields. * If multiple classes have a relation with the same name all of these will be included in the search index * Note that only classes that have the relations uninherited (defined in them) will be listed * this is because inherited relations do not need to be processed by index explicitly */ public function fieldData($field, $forceType = null, $extraOptions = []) { $fullfield = str_replace(".", "_", $field ?? ''); $sources = $this->getClasses(); foreach ($sources as $source => $options) { $sources[$source]['base'] = DataObject::getSchema()->baseDataClass($source); $sources[$source]['lookup_chain'] = []; } $found = []; if (strpos($field ?? '', '.') !== false) { $lookups = explode(".", $field ?? ''); $field = array_pop($lookups); foreach ($lookups as $lookup) { $next = []; foreach ($sources as $source => $baseOptions) { $source = $this->getSourceName($source); foreach (SearchIntrospection::hierarchy($source, $baseOptions['include_children']) as $dataclass) { $class = null; $options = $baseOptions; $singleton = singleton($dataclass); $schema = DataObject::getSchema(); $className = $singleton->getClassName(); if ($hasOne = $schema->hasOneComponent($className, $lookup)) { // we only want to include base class for relation, omit classes that inherited the relation $relationList = Config::inst()->get($dataclass, 'has_one', Config::UNINHERITED); $relationList = (!is_null($relationList)) ? $relationList : []; if (!array_key_exists($lookup, $relationList ?? [])) { continue; } $class = $hasOne; $options['lookup_chain'][] = array( 'call' => 'method', 'method' => $lookup, 'through' => 'has_one', 'class' => $dataclass, 'otherclass' => $class, 'foreignkey' => "{$lookup}ID" ); } elseif ($hasMany = $schema->hasManyComponent($className, $lookup)) { // we only want to include base class for relation, omit classes that inherited the relation $relationList = Config::inst()->get($dataclass, 'has_many', Config::UNINHERITED); $relationList = (!is_null($relationList)) ? $relationList : []; if (!array_key_exists($lookup, $relationList ?? [])) { continue; } $class = $hasMany; $options['multi_valued'] = true; $options['lookup_chain'][] = array( 'call' => 'method', 'method' => $lookup, 'through' => 'has_many', 'class' => $dataclass, 'otherclass' => $class, 'foreignkey' => $schema->getRemoteJoinField($className, $lookup, 'has_many') ); } elseif ($manyMany = $schema->manyManyComponent($className, $lookup)) { // we only want to include base class for relation, omit classes that inherited the relation $relationList = Config::inst()->get($dataclass, 'many_many', Config::UNINHERITED); $relationList = (!is_null($relationList)) ? $relationList : []; if (!array_key_exists($lookup, $relationList ?? [])) { continue; } $class = $manyMany['childClass']; $options['multi_valued'] = true; $options['lookup_chain'][] = array( 'call' => 'method', 'method' => $lookup, 'through' => 'many_many', 'class' => $dataclass, 'otherclass' => $class, 'details' => $manyMany, ); } if (is_string($class) && $class) { if (!isset($options['origin'])) { $options['origin'] = $dataclass; } // we add suffix here to prevent the relation to be overwritten by other instances // all sources lookups must clean the source name before reading it via getSourceName() $next[$class . self::config()->get('class_delimiter') . $dataclass] = $options; } } } if (!$next) { return $next; } // Early out to avoid excessive empty looping $sources = $next; } } foreach ($sources as $class => $options) { $class = $this->getSourceName($class); $dataclasses = SearchIntrospection::hierarchy($class, $options['include_children']); while (count($dataclasses ?? [])) { $dataclass = array_shift($dataclasses); $type = null; $fieldoptions = $options; $fields = DataObject::getSchema()->databaseFields($class); if (isset($fields[$field])) { $type = $fields[$field]; $fieldoptions['lookup_chain'][] = array('call' => 'property', 'property' => $field); } else { $singleton = singleton($dataclass); if ($singleton->hasMethod("get$field") || $singleton->hasField($field)) { $type = $singleton->castingClass($field); if (!$type) { $type = 'String'; } if ($singleton->hasMethod("get$field")) { $fieldoptions['lookup_chain'][] = array('call' => 'method', 'method' => "get$field"); } else { $fieldoptions['lookup_chain'][] = array('call' => 'property', 'property' => $field); } } } if ($type) { // Don't search through child classes of a class we matched on. TODO: Should we? $dataclasses = array_diff($dataclasses ?? [], array_values(ClassInfo::subclassesFor($dataclass) ?? [])); // Trim arguments off the type string if (preg_match('/^(\w+)\(/', $type ?? '', $match)) { $type = $match[1]; } // Get the origin $origin = isset($fieldoptions['origin']) ? $fieldoptions['origin'] : $dataclass; $found["{$origin}_{$fullfield}"] = array( 'name' => "{$origin}_{$fullfield}", 'field' => $field, 'fullfield' => $fullfield, 'base' => $fieldoptions['base'], 'origin' => $origin, 'class' => $dataclass, 'lookup_chain' => $fieldoptions['lookup_chain'], 'type' => $forceType ? $forceType : $type, 'multi_valued' => isset($fieldoptions['multi_valued']) ? true : false, 'extra_options' => $extraOptions ); } } } return $found; } /** Public, but should only be altered by variants */ protected $classes = array(); protected $fulltextFields = array(); public $filterFields = array(); protected $sortFields = array(); protected $excludedVariantStates = array(); /** * Add a DataObject subclass whose instances should be included in this index * * Can only be called when addFulltextField, addFilterField, addSortField and addAllFulltextFields have not * yet been called for this index instance * * @throws Exception * @param string $class - The class to include * @param array $options - TODO: Remove */ public function addClass($class, $options = array()) { if ($this->fulltextFields || $this->filterFields || $this->sortFields) { throw new Exception('Can\'t add class to Index after fields have already been added'); } $options = array_merge(array( 'include_children' => true ), $options); $this->classes[$class] = $options; } /** * Get the classes added by addClass */ public function getClasses() { return $this->classes; } /** * Add a field that should be fulltext searchable * @param string $field - The field to add * @param string $forceType - The type to force this field as (required in some cases, when not detectable from metadata) * @param string $extraOptions - Dependent on search implementation */ public function addFulltextField($field, $forceType = null, $extraOptions = array()) { $this->fulltextFields = array_merge($this->fulltextFields, $this->fieldData($field, $forceType, $extraOptions)); } public function getFulltextFields() { return $this->fulltextFields; } /** * Add a field that should be filterable * @param string $field - The field to add * @param string $forceType - The type to force this field as (required in some cases, when not detectable from metadata) * @param string $extraOptions - Dependent on search implementation */ public function addFilterField($field, $forceType = null, $extraOptions = array()) { $this->filterFields = array_merge($this->filterFields, $this->fieldData($field, $forceType, $extraOptions)); } public function getFilterFields() { return $this->filterFields; } /** * Add a field that should be sortable * @param string $field - The field to add * @param string $forceType - The type to force this field as (required in some cases, when not detectable from metadata) * @param string $extraOptions - Dependent on search implementation */ public function addSortField($field, $forceType = null, $extraOptions = array()) { $this->sortFields = array_merge($this->sortFields, $this->fieldData($field, $forceType, $extraOptions)); } public function getSortFields() { return $this->sortFields; } /** * Add all database-backed text fields as fulltext searchable fields. * * For every class included in the index, examines those classes and all subclasses looking for "Text" database * fields (Varchar, Text, HTMLText, etc) and adds them all as fulltext searchable fields. */ public function addAllFulltextFields($includeSubclasses = true) { foreach ($this->getClasses() as $class => $options) { $classHierarchy = SearchIntrospection::hierarchy($class, $includeSubclasses, true); foreach ($classHierarchy as $dataClass) { $fields = DataObject::getSchema()->databaseFields($dataClass); foreach ($fields as $field => $type) { list($type, $args) = ClassInfo::parse_class_spec($type); /** @var DBField $object */ $object = Injector::inst()->get($type, false, ['Name' => 'test']); if ($object instanceof DBString) { $this->addFulltextField($field); } } } } } /** * Returns an interator that will let you interate through all added fields, regardless of whether they * were added as fulltext, filter or sort fields. * * @return MultipleArrayIterator */ public function getFieldsIterator() { return new MultipleArrayIterator($this->fulltextFields, $this->filterFields, $this->sortFields); } public function excludeVariantState($state) { $this->excludedVariantStates[] = $state; } /** Returns true if some variant state should be ignored */ public function variantStateExcluded($state) { foreach ($this->excludedVariantStates as $excludedstate) { $matches = true; foreach ($excludedstate as $variant => $variantstate) { if (!isset($state[$variant]) || $state[$variant] != $variantstate) { $matches = false; break; } } if ($matches) { return true; } } } public $dependancyList = array(); public function buildDependancyList() { $this->dependancyList = array_keys($this->getClasses() ?? []); foreach ($this->getFieldsIterator() as $name => $field) { if (!isset($field['class'])) { continue; } SearchIntrospection::add_unique_by_ancestor($this->dependancyList, $field['class']); } } public $derivedFields = null; /** * Returns an array where each member is all the fields and the classes that are at the end of some * specific lookup chain from one of the base classes */ public function getDerivedFields() { if ($this->derivedFields === null) { $this->derivedFields = array(); foreach ($this->getFieldsIterator() as $name => $field) { if (count($field['lookup_chain'] ?? []) < 2) { continue; } $key = sha1($field['base'] . serialize($field['lookup_chain'])); $fieldname = "{$field['class']}:{$field['field']}"; if (isset($this->derivedFields[$key])) { $this->derivedFields[$key]['fields'][$fieldname] = $fieldname; SearchIntrospection::add_unique_by_ancestor($this->derivedFields['classes'], $field['class']); } else { $chain = array_reverse($field['lookup_chain'] ?? []); array_shift($chain); $this->derivedFields[$key] = array( 'base' => $field['base'], 'fields' => array($fieldname => $fieldname), 'classes' => array($field['class']), 'chain' => $chain ); } } } return $this->derivedFields; } /** * Get the "document ID" (a database & variant unique id) given some "Base" class, DataObject ID and state array * * @param string $base - The base class of the object * @param integer $id - The ID of the object * @param array $state - The variant state of the object * @return string - The document ID as a string */ public function getDocumentIDForState($base, $id, $state) { ksort($state); $parts = array('id' => $id, 'base' => $base, 'state' => json_encode($state)); return implode('-', array_values($parts ?? [])); } /** * Get the "document ID" (a database & variant unique id) given some "Base" class and DataObject * * @param DataObject $object - The object * @param string $base - The base class of the object * @param boolean $includesubs - TODO: Probably going away * @return string - The document ID as a string */ public function getDocumentID($object, $base, $includesubs) { return $this->getDocumentIDForState($base, $object->ID, SearchVariant::current_state($base, $includesubs)); } /** * Given an object and a field definition (as returned by fieldData) get the current value of that field on that object * * @param DataObject $object - The object to get the value from * @param array $field - The field definition to use * @return mixed - The value of the field, or null if we couldn't look it up for some reason */ protected function _getFieldValue($object, $field) { $errorHandler = function ($no, $str) { throw new Exception('HTML Parse Error: ' . $str); }; set_error_handler($errorHandler, E_ALL & ~(E_DEPRECATED | E_USER_DEPRECATED)); try { foreach ($field['lookup_chain'] as $step) { // Just fail if we've fallen off the end of the chain if (!$object) { return null; } // If we're looking up this step on an array or SS_List, do the step on every item, merge result if (is_array($object) || $object instanceof SS_List) { $next = array(); foreach ($object as $item) { if ($step['call'] == 'method') { $method = $step['method']; $item = $item->$method(); } else { $property = $step['property']; $item = $item->$property; } if ($item instanceof SS_List) { $next = array_merge($next, $item->toArray()); } elseif (is_array($item)) { $next = array_merge($next, $item); } else { $next[] = $item; } } $object = $next; } else { // Otherwise, just call if ($step['call'] == 'method') { $method = $step['method']; $object = $object->$method(); } elseif ($step['call'] == 'variant') { $variants = SearchVariant::variants(); $variant = $variants[$step['variant']]; $method = $step['method']; $object = $variant->$method($object); } else { $property = $step['property']; $object = $object->$property; } } } } catch (Exception $e) { static::warn($e); $object = null; } restore_error_handler(); return $object; } /** * Log non-fatal errors * * @param Exception $e */ public static function warn($e) { Injector::inst()->get(LoggerInterface::class)->info($e); } /** * Given a class, object id, set of stateful ids and a list of changed fields (in a special format), * return what statefulids need updating in this index * * Internal function used by SearchUpdater. * * @param string $class * @param int $id * @param array $statefulids * @param array $fields * @return array */ public function getDirtyIDs($class, $id, $statefulids, $fields) { $dirty = array(); // First, if this object is directly contained in the index, add it foreach ($this->classes as $searchclass => $options) { if ($searchclass == $class || ($options['include_children'] && is_subclass_of($class, $searchclass ?? ''))) { $base = DataObject::getSchema()->baseDataClass($searchclass); $dirty[$base] = array(); foreach ($statefulids as $statefulid) { $key = serialize($statefulid); $dirty[$base][$key] = $statefulid; } } } $current = SearchVariant::current_state(); // Then, for every derived field foreach ($this->getDerivedFields() as $derivation) { // If the this object is a subclass of any of the classes we want a field from if (!SearchIntrospection::is_subclass_of($class, $derivation['classes'])) { continue; } if (!array_intersect_key($fields ?? [], $derivation['fields'])) { continue; } foreach (SearchVariant::reindex_states($class, false) as $state) { SearchVariant::activate_state($state); $ids = array($id); foreach ($derivation['chain'] as $step) { if ($step['through'] == 'has_one') { $ids = DataObject::get($step['class']) ->filter($step['foreignkey'], $ids) ->column('ID'); } elseif ($step['through'] == 'has_many') { // foreignkey identifies a has_one column on the model linked via the has_many relation $ids = DataObject::get($step['otherclass']) ->filter('ID', $ids) ->column($step['foreignkey']); } if (empty($ids)) { break; } } SearchVariant::activate_state($current); if ($ids) { $base = $derivation['base']; if (!isset($dirty[$base])) { $dirty[$base] = array(); } foreach ($ids as $rid) { $statefulid = array('id' => $rid, 'state' => $state); $key = serialize($statefulid); $dirty[$base][$key] = $statefulid; } } } } return $dirty; } /** !! These should be implemented by the full text search engine */ abstract public function add($object); abstract public function delete($base, $id, $state); abstract public function commit(); /** !! These should be implemented by the specific index */ /** * Called during construction, this is the method that builds the structure. * Used instead of overriding __construct as we have specific execution order - code that has * to be run before _and/or_ after this. */ abstract public function init(); }