<?php Solr::include_client_api(); abstract class SolrIndex extends SearchIndex { static $fulltextTypeMap = array( '*' => 'text', 'HTMLVarchar' => 'htmltext', 'HTMLText' => 'htmltext' ); static $filterTypeMap = array( '*' => 'string', 'Boolean' => 'boolean', 'Date' => 'tdate', 'SSDatetime' => 'tdate', 'SS_Datetime' => 'tdate', 'ForeignKey' => 'tint', 'Int' => 'tint', 'Float' => 'tfloat', 'Double' => 'tdouble' ); static $sortTypeMap = array(); protected $analyzerFields = array(); protected $copyFields = array(); protected $extrasPath = null; protected $templatesPath = null; /** * List of boosted fields * * @var array */ protected $boostedFields = array(); /** * Name of default field * * @var string * @config */ private static $default_field = '_text'; /** * @return String Absolute path to the folder containing * templates which are used for generating the schema and field definitions. */ function getTemplatesPath() { $globalOptions = Solr::solr_options(); return $this->templatesPath ? $this->templatesPath : $globalOptions['templatespath']; } /** * @return String Absolute path to the configuration default files, * e.g. solrconfig.xml. */ function getExtrasPath() { $globalOptions = Solr::solr_options(); return $this->extrasPath ? $this->extrasPath : $globalOptions['extraspath']; } function generateSchema() { return $this->renderWith($this->getTemplatesPath() . '/schema.ss'); } function getIndexName() { return get_class($this); } function getTypes() { return $this->renderWith($this->getTemplatesPath() . '/types.ss'); } /** * Index-time analyzer which is applied to a specific field. * Can be used to remove HTML tags, apply stemming, etc. * * @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.WhitespaceTokenizerFactory * * @param String $field * @param String $type * @param Array $params Parameters for the analyzer, usually at least a "class" */ function addAnalyzer($field, $type, $params) { $fullFields = $this->fieldData($field); if($fullFields) foreach($fullFields as $fullField => $spec) { if(!isset($this->analyzerFields[$fullField])) $this->analyzerFields[$fullField] = array(); $this->analyzerFields[$fullField][$type] = $params; } } /** * Get the default text field, normally '_text' * * @return string */ public function getDefaultField() { return $this->config()->default_field; } public function getFieldDefinitions() { $xml = array(); $stored = $this->getStoredDefault(); $xml[] = ""; // Add the hardcoded field definitions $xml[] = "<field name='_documentid' type='string' indexed='true' stored='true' required='true' />"; $xml[] = "<field name='ID' type='tint' indexed='true' stored='true' required='true' />"; $xml[] = "<field name='ClassName' type='string' indexed='true' stored='true' required='true' />"; $xml[] = "<field name='ClassHierarchy' type='string' indexed='true' stored='true' required='true' multiValued='true' />"; // Add the fulltext collation field $df = $this->getDefaultField(); $xml[] = "<field name='{$df}' type='htmltext' indexed='true' stored='{$stored}' multiValued='true' />" ; // Add the user-specified fields foreach ($this->fulltextFields as $name => $field) { $xml[] = $this->getFieldDefinition($name, $field, self::$fulltextTypeMap); } foreach ($this->filterFields as $name => $field) { if ($field['fullfield'] == 'ID' || $field['fullfield'] == 'ClassName') continue; $xml[] = $this->getFieldDefinition($name, $field); } foreach ($this->sortFields as $name => $field) { if ($field['fullfield'] == 'ID' || $field['fullfield'] == 'ClassName') continue; $xml[] = $this->getFieldDefinition($name, $field); } return implode("\n\t\t", $xml); } /** * Extract first suggestion text from collated values * * @param mixed $collation * @return string */ protected function getCollatedSuggestion($collation = '') { if(is_string($collation)) { return $collation; } if(is_object($collation)) { if(isset($collation->misspellingsAndCorrections)) { foreach($collation->misspellingsAndCorrections as $key => $value) { return $value; } } } return ''; } /** * Extract a human friendly spelling suggestion from a Solr spellcheck collation string. * @param String $collation * @return String */ protected function getNiceSuggestion($collation = '') { $collationParts = explode(' ', $collation); // Remove advanced query params from the beginning of each collation part. foreach ($collationParts as $key => &$part) { $part = ltrim($part, '+'); } return implode(' ', $collationParts); } /** * Extract a query string from a Solr spellcheck collation string. * Useful for constructing 'Did you mean?' links, for example: * <a href="http://example.com/search?q=$SuggestionQueryString">$SuggestionNice</a> * @param String $collation * @return String */ protected function getSuggestionQueryString($collation = '') { return str_replace(' ', '+', $this->getNiceSuggestion($collation)); } /** * Add a field that should be stored * * @param string $field The field to add * @param string $forceType The type to force this field as (required in some cases, when not * detectable from metadata) * @param array $extraOptions Dependent on search implementation */ public function addStoredField($field, $forceType = null, $extraOptions = array()) { $options = array_merge($extraOptions, array('stored' => 'true')); $this->addFulltextField($field, $forceType, $options); } /** * Add a fulltext field with a boosted value * * @param string $field The field to add * @param string $forceType The type to force this field as (required in some cases, when not * detectable from metadata) * @param array $extraOptions Dependent on search implementation * @param float $boost Numeric boosting value (defaults to 2) */ public function addBoostedField($field, $forceType = null, $extraOptions = array(), $boost = 2) { $options = array_merge($extraOptions, array('boost' => $boost)); $this->addFulltextField($field, $forceType, $options); } public function fieldData($field, $forceType = null, $extraOptions = array()) { // Ensure that 'boost' is recorded here without being captured by solr $boost = null; if(array_key_exists('boost', $extraOptions)) { $boost = $extraOptions['boost']; unset($extraOptions['boost']); } $data = parent::fieldData($field, $forceType, $extraOptions); // Boost all fields with this name if(isset($boost)) { foreach($data as $fieldName => $fieldInfo) { $this->boostedFields[$fieldName] = $boost; } } return $data; } /** * Set the default boosting level for a specific field. * Will control the default value for qf param (Query Fields), but will not * override a query-specific value. * * Fields must be added before having a field boosting specified * * @param string $field Full field key (Model_Field) * @param float|null $level Numeric boosting value. Set to null to clear boost */ public function setFieldBoosting($field, $level) { if(!isset($this->fulltextFields[$field])) { throw new InvalidArgumentException("No fulltext field $field exists on ".$this->getIndexName()); } if($level === null) { unset($this->boostedFields[$field]); } else { $this->boostedFields[$field] = $level; } } /** * Get all boosted fields * * @return array */ public function getBoostedFields() { return $this->boostedFields; } /** * Determine the best default value for the 'qf' parameter * * @return array|null List of query fields, or null if not specified */ public function getQueryFields() { // Not necessary to specify this unless boosting if(empty($this->boostedFields)) { return null; } $queryFields = array(); foreach ($this->boostedFields as $fieldName => $boost) { $queryFields[] = $fieldName . '^' . $boost; } // If any fields are queried, we must always include the default field, otherwise it will be excluded $df = $this->getDefaultField(); if($queryFields && !isset($this->boostedFields[$df])) { $queryFields[] = $df; } return $queryFields; } /** * Gets the default 'stored' value for fields in this index * * @return string A default value for the 'stored' field option, either 'true' or 'false' */ protected function getStoredDefault() { return Director::isDev() ? 'true' : 'false'; } /** * @param String $name * @param Array $spec * @param Array $typeMap * @return String XML */ protected function getFieldDefinition($name, $spec, $typeMap = null) { if(!$typeMap) $typeMap = self::$filterTypeMap; $multiValued = (isset($spec['multi_valued']) && $spec['multi_valued']) ? "true" : ''; $type = isset($typeMap[$spec['type']]) ? $typeMap[$spec['type']] : $typeMap['*']; $analyzerXml = ''; if(isset($this->analyzerFields[$name])) { foreach($this->analyzerFields[$name] as $analyzerType => $analyzerParams) { $analyzerXml .= $this->toXmlTag($analyzerType, $analyzerParams); } } $fieldParams = array_merge( array( 'name' => $name, 'type' => $type, 'indexed' => 'true', 'stored' => $this->getStoredDefault(), 'multiValued' => $multiValued ), isset($spec['extra_options']) ? $spec['extra_options'] : array() ); return $this->toXmlTag( "field", $fieldParams, $analyzerXml ? "<analyzer>$analyzerXml</analyzer>" : null ); } /** * Convert definition to XML tag * * @param String $tag * @param String $attrs Map of attributes * @param String $content Inner content * @return String XML tag */ protected function toXmlTag($tag, $attrs, $content = null) { $xml = "<$tag "; if($attrs) { $attrStrs = array(); foreach($attrs as $attrName => $attrVal) $attrStrs[] = "$attrName='$attrVal'"; $xml .= $attrStrs ? implode(' ', $attrStrs) : ''; } $xml .= $content ? ">$content</$tag>" : '/>'; return $xml; } /** * @param String $source Composite field name (<class>_<fieldname>) * @param String $dest */ function addCopyField($source, $dest, $extraOptions = array()) { if(!isset($this->copyFields[$source])) $this->copyFields[$source] = array(); $this->copyFields[$source][] = array_merge( array('source' => $source, 'dest' => $dest), $extraOptions ); } function getCopyFieldDefinitions() { $xml = array(); $df = $this->getDefaultField(); foreach ($this->fulltextFields as $name => $field) { $xml[] = "<copyField source='{$name}' dest='{$df}' />"; } foreach ($this->copyFields as $source => $fields) { foreach($fields as $fieldAttrs) { $xml[] = $this->toXmlTag('copyField', $fieldAttrs); } } return implode("\n\t", $xml); } protected function _addField($doc, $object, $field) { $class = get_class($object); if ($class != $field['origin'] && !is_subclass_of($class, $field['origin'])) return; $value = $this->_getFieldValue($object, $field); $type = isset(self::$filterTypeMap[$field['type']]) ? self::$filterTypeMap[$field['type']] : self::$filterTypeMap['*']; if (is_array($value)) foreach($value as $sub) { /* Solr requires dates in the form 1995-12-31T23:59:59Z */ if ($type == 'tdate') { if(!$sub) continue; $sub = gmdate('Y-m-d\TH:i:s\Z', strtotime($sub)); } /* Solr requires numbers to be valid if presented, not just empty */ if (($type == 'tint' || $type == 'tfloat' || $type == 'tdouble') && !is_numeric($sub)) continue; $doc->addField($field['name'], $sub); } else { /* Solr requires dates in the form 1995-12-31T23:59:59Z */ if ($type == 'tdate') { if(!$value) return; $value = gmdate('Y-m-d\TH:i:s\Z', strtotime($value)); } /* Solr requires numbers to be valid if presented, not just empty */ if (($type == 'tint' || $type == 'tfloat' || $type == 'tdouble') && !is_numeric($value)) return; $doc->setField($field['name'], $value); } } protected function _addAs($object, $base, $options) { $includeSubs = $options['include_children']; $doc = new Apache_Solr_Document(); // Always present fields $doc->setField('_documentid', $this->getDocumentID($object, $base, $includeSubs)); $doc->setField('ID', $object->ID); $doc->setField('ClassName', $object->ClassName); foreach (SearchIntrospection::hierarchy(get_class($object), false) as $class) $doc->addField('ClassHierarchy', $class); // Add the user-specified fields foreach ($this->getFieldsIterator() as $name => $field) { if ($field['base'] == $base) $this->_addField($doc, $object, $field); } try { $this->getService()->addDocument($doc); } catch (Exception $e) { SS_Log::log($e, SS_Log::WARN); return false; } return $doc; } function add($object) { $class = get_class($object); $docs = array(); foreach ($this->getClasses() as $searchclass => $options) { if ($searchclass == $class || ($options['include_children'] && is_subclass_of($class, $searchclass))) { $base = ClassInfo::baseDataClass($searchclass); $docs[] = $this->_addAs($object, $base, $options); } } return $docs; } function canAdd($class) { foreach ($this->classes as $searchclass => $options) { if ($searchclass == $class || ($options['include_children'] && is_subclass_of($class, $searchclass))) return true; } return false; } function delete($base, $id, $state) { $documentID = $this->getDocumentIDForState($base, $id, $state); try { $this->getService()->deleteById($documentID); } catch (Exception $e) { SS_Log::log($e, SS_Log::WARN); return false; } } function commit() { try { $this->getService()->commit(false, false, false); } catch (Exception $e) { SS_Log::log($e, SS_Log::WARN); return false; } } /** * @param SearchQuery $query * @param integer $offset * @param integer $limit * @param array $params Extra request parameters passed through to Solr * @return ArrayData Map with the following keys: * - 'Matches': ArrayList of the matched object instances */ public function search(SearchQuery $query, $offset = -1, $limit = -1, $params = array()) { $service = $this->getService(); $searchClass = count($query->classes) == 1 ? $query->classes[0]['class'] : null; SearchVariant::with($searchClass) ->call('alterQuery', $query, $this); $q = array(); // Query $fq = array(); // Filter query $qf = array(); // Query fields $hlq = array(); // Highlight query // Build the search itself foreach ($query->search as $search) { $text = $search['text']; preg_match_all('/"[^"]*"|\S+/', $text, $parts); $fuzzy = $search['fuzzy'] ? '~' : ''; foreach ($parts[0] as $part) { $fields = (isset($search['fields'])) ? $search['fields'] : array(); if(isset($search['boost'])) { $fields = array_merge($fields, array_keys($search['boost'])); } if ($fields) { $searchq = array(); foreach ($fields as $field) { $boost = (isset($search['boost'][$field])) ? '^' . $search['boost'][$field] : ''; $searchq[] = "{$field}:".$part.$fuzzy.$boost; } $q[] = '+('.implode(' OR ', $searchq).')'; } else { $q[] = '+'.$part.$fuzzy; } $hlq[] = $part; } } // If using boosting, set the clean term separately for highlighting. // See https://issues.apache.org/jira/browse/SOLR-2632 if(array_key_exists('hl', $params) && !array_key_exists('hl.q', $params)) { $params['hl.q'] = implode(' ', $hlq); } // Filter by class if requested $classq = array(); foreach ($query->classes as $class) { if (!empty($class['includeSubclasses'])) $classq[] = 'ClassHierarchy:'.$class['class']; else $classq[] = 'ClassName:'.$class['class']; } if ($classq) $fq[] = '+('.implode(' ', $classq).')'; // Filter by filters foreach ($query->require as $field => $values) { $requireq = array(); foreach ($values as $value) { if ($value === SearchQuery::$missing) { $requireq[] = "(*:* -{$field}:[* TO *])"; } else if ($value === SearchQuery::$present) { $requireq[] = "{$field}:[* TO *]"; } else if ($value instanceof SearchQuery_Range) { $start = $value->start; if ($start === null) $start = '*'; $end = $value->end; if ($end === null) $end = '*'; $requireq[] = "$field:[$start TO $end]"; } else { $requireq[] = $field.':"'.$value.'"'; } } $fq[] = '+('.implode(' ', $requireq).')'; } foreach ($query->exclude as $field => $values) { $excludeq = array(); $missing = false; foreach ($values as $value) { if ($value === SearchQuery::$missing) { $missing = true; } else if ($value === SearchQuery::$present) { $excludeq[] = "{$field}:[* TO *]"; } else if ($value instanceof SearchQuery_Range) { $start = $value->start; if ($start === null) $start = '*'; $end = $value->end; if ($end === null) $end = '*'; $excludeq[] = "$field:[$start TO $end]"; } else { $excludeq[] = $field.':"'.$value.'"'; } } $fq[] = ($missing ? "+{$field}:[* TO *] " : '') . '-('.implode(' ', $excludeq).')'; } // Prepare query fields unless specified explicitly if(isset($params['qf'])) { $qf = $params['qf']; } else { $qf = $this->getQueryFields(); } if(is_array($qf)) { $qf = implode(' ', $qf); } if($qf) { $params['qf'] = $qf; } if(!headers_sent() && !Director::isLive()) { if ($q) header('X-Query: '.implode(' ', $q)); if ($fq) header('X-Filters: "'.implode('", "', $fq).'"'); if ($qf) header('X-QueryFields: '.$qf); } if ($offset == -1) $offset = $query->start; if ($limit == -1) $limit = $query->limit; if ($limit == -1) $limit = SearchQuery::$default_page_size; $params = array_merge($params, array('fq' => implode(' ', $fq))); $res = $service->search( $q ? implode(' ', $q) : '*:*', $offset, $limit, $params, Apache_Solr_Service::METHOD_POST ); $results = new ArrayList(); if($res->getHttpStatus() >= 200 && $res->getHttpStatus() < 300) { foreach ($res->response->docs as $doc) { $result = DataObject::get_by_id($doc->ClassName, $doc->ID); if($result) { $results->push($result); // Add highlighting (optional) $docId = $doc->_documentid; if($res->highlighting && $res->highlighting->$docId) { // TODO Create decorator class for search results rather than adding arbitrary object properties // TODO Allow specifying highlighted field, and lazy loading // in case the search API needs another query (similar to SphinxSearchable->buildExcerpt()). $combinedHighlights = array(); foreach($res->highlighting->$docId as $field => $highlights) { $combinedHighlights = array_merge($combinedHighlights, $highlights); } // Remove entity-encoded U+FFFD replacement character. It signifies non-displayable characters, // and shows up as an encoding error in browsers. $result->Excerpt = DBField::create_field( 'HTMLText', str_replace( '�', '', implode(' ... ', $combinedHighlights) ) ); } } } $numFound = $res->response->numFound; } else { $numFound = 0; } $ret = array(); $ret['Matches'] = new PaginatedList($results); $ret['Matches']->setLimitItems(false); // Tell PaginatedList how many results there are $ret['Matches']->setTotalItems($numFound); // Results for current page start at $offset $ret['Matches']->setPageStart($offset); // Results per page $ret['Matches']->setPageLength($limit); // Include spellcheck and suggestion data. Requires spellcheck=true in $params if(isset($res->spellcheck)) { // Expose all spellcheck data, for custom handling. $ret['Spellcheck'] = $res->spellcheck; // Suggestions. Requires spellcheck.collate=true in $params if(isset($res->spellcheck->suggestions->collation)) { // Extract string suggestion $suggestion = $this->getCollatedSuggestion($res->spellcheck->suggestions->collation); // The collation, including advanced query params (e.g. +), suitable for making another query programmatically. $ret['Suggestion'] = $suggestion; // A human friendly version of the suggestion, suitable for 'Did you mean $SuggestionNice?' display. $ret['SuggestionNice'] = $this->getNiceSuggestion($suggestion); // A string suitable for appending to an href as a query string. // For example <a href="http://example.com/search?q=$SuggestionQueryString">$SuggestionNice</a> $ret['SuggestionQueryString'] = $this->getSuggestionQueryString($suggestion); } } return new ArrayData($ret); } protected $service; /** * @return SolrService */ public function getService() { if(!$this->service) $this->service = Solr::service(get_class($this)); return $this->service; } public function setService(SolrService $service) { $this->service = $service; return $this; } /** * Upload config for this index to the given store * * @param SolrConfigStore $store */ public function uploadConfig($store) { // Upload the config files for this index $store->uploadString( $this->getIndexName(), 'schema.xml', (string)$this->generateSchema() ); // Upload additional files foreach (glob($this->getExtrasPath().'/*') as $file) { if (is_file($file)) { $store->uploadFile($this->getIndexName(), $file); } } } }