Merge branch 'docs' of github.com:silverstripe-big-o/silverstripe-fulltextsearch into silverstripe-big-o-docs

This commit is contained in:
Sean Harvey 2013-02-13 10:43:05 +13:00
commit 7a9a698c2d
2 changed files with 122 additions and 17 deletions

View File

@ -3,7 +3,7 @@
/** /**
* Represents a search query * Represents a search query
* *
* API very much still in flux. Generally, calling with multiple arguments = OR, calling multiple times = AND. * API very much still in flux.
*/ */
class SearchQuery extends ViewableData { class SearchQuery extends ViewableData {
@ -32,9 +32,9 @@ class SearchQuery extends ViewableData {
} }
/** /**
* @param [type] $text [description] * @param String $text Search terms. Exact format (grouping, boolean expressions, etc.) depends on the search implementation.
* @param [type] $fields [description] * @param array $fields Limits the search to specific fields (using composite field names)
* @param array $boost Map of field names to float values. The higher the value, * @param array $boost Map of composite field names to float values. The higher the value,
* the more important the field gets for relevancy. * the more important the field gets for relevancy.
*/ */
function search($text, $fields = null, $boost = array()) { function search($text, $fields = null, $boost = array()) {
@ -42,10 +42,13 @@ class SearchQuery extends ViewableData {
} }
/** /**
* @param [type] $text [description] * Similar to {@link search()}, but uses stemming and other similarity algorithms
* @param [type] $fields [description] * to find the searched terms. For example, a term "fishing" would also likely find results
* @param array $boost Map of field names to float values. The higher the value, * containing "fish" or "fisher". Depends on search implementation.
* the more important the field gets for relevancy. *
* @param String $text See {@link search()}
* @param array $fields See {@link search()}
* @param array $boost See {@link search()}
*/ */
function fuzzysearch($text, $fields = null, $boost = array()) { function fuzzysearch($text, $fields = null, $boost = array()) {
$this->search[] = array('text' => $text, 'fields' => $fields ? (array)$fields : null, 'boost' => $boost, 'fuzzy' => true); $this->search[] = array('text' => $text, 'fields' => $fields ? (array)$fields : null, 'boost' => $boost, 'fuzzy' => true);
@ -55,12 +58,25 @@ class SearchQuery extends ViewableData {
$this->classes[] = array('class' => $class, 'includeSubclasses' => $includeSubclasses); $this->classes[] = array('class' => $class, 'includeSubclasses' => $includeSubclasses);
} }
/**
* Similar to {@link search()}, but typically used to further narrow down
* based on other facets which don't influence the field relevancy.
*
* @param String $field Composite name of the field
* @param Mixed $values Scalar value, array of values, or an instance of SearchQuery_Range
*/
function filter($field, $values) { function filter($field, $values) {
$requires = isset($this->require[$field]) ? $this->require[$field] : array(); $requires = isset($this->require[$field]) ? $this->require[$field] : array();
$values = is_array($values) ? $values : array($values); $values = is_array($values) ? $values : array($values);
$this->require[$field] = array_merge($requires, $values); $this->require[$field] = array_merge($requires, $values);
} }
/**
* Excludes results which match these criteria, inverse of {@link filter()}.
*
* @param String $field
* @param mixed $values
*/
function exclude($field, $values) { function exclude($field, $values) {
$excludes = isset($this->exclude[$field]) ? $this->exclude[$field] : array(); $excludes = isset($this->exclude[$field]) ? $this->exclude[$field] : array();
$values = is_array($values) ? $values : array($values); $values = is_array($values) ? $values : array($values);

View File

@ -18,11 +18,9 @@ This is a module aimed at adding support for standalone fulltext search engines
It contains several layers: It contains several layers:
* A fulltext API, ignoring the actual provision of fulltext searching * A fulltext API, ignoring the actual provision of fulltext searching
* A connector API, providing common code to allow connecting a fulltext searching engine to the fulltext API, and
* A connector API, providing common code to allow connecting a fulltext searching engine to the fulltext API, and * Some connectors for common fulltext searching engines.
* Some connectors for common fulltext searching engines.
## Reasoning ## Reasoning
@ -38,7 +36,7 @@ This module instead provides the ability to define those indexes and queries in
between the SilverStripe object model and the connector-specific fulltext engine index model. This module then interrogates model metadata between the SilverStripe object model and the connector-specific fulltext engine index model. This module then interrogates model metadata
to build the specific index definition. to build the specific index definition.
It also hooks into Sapphire in order to update the indexes when the models change and connectors then convert those index and query definitions It also hooks into SilverStripe framework in order to update the indexes when the models change and connectors then convert those index and query definitions
into fulltext engine specific code. into fulltext engine specific code.
The intent of this module is not to make changing fulltext search engines seamless. Where possible this module provides The intent of this module is not to make changing fulltext search engines seamless. Where possible this module provides
@ -57,13 +55,17 @@ Basic usage is a four step process:
class MyIndex extends SolrIndex { class MyIndex extends SolrIndex {
function init() { function init() {
$this->addClass('Page'); $this->addClass('Page');
$this->addFulltextField('DocumentContents'); $this->addFulltextField('Title');
$this->addFulltextField('Content');
} }
} }
You can also skip listing all searchable fields, and have the index
figure it out automatically via `addAllFulltextFields()`.
2). Add something to the index (Note: You can also just update an existing document in the CMS. but adding _existing_ objects to the index is connector specific) 2). Add something to the index (Note: You can also just update an existing document in the CMS. but adding _existing_ objects to the index is connector specific)
$page = new Page(array('Contents' => 'Help me. My house is on fire. This is less than optimal.')); $page = new Page(array('Content' => 'Help me. My house is on fire. This is less than optimal.'));
$page->write(); $page->write();
Note: There's usually a connector-specific "reindex" task for this. Note: There's usually a connector-specific "reindex" task for this.
@ -79,6 +81,89 @@ Note: There's usually a connector-specific "reindex" task for this.
Note that for most connectors, changes won't be searchable until _after_ the request that triggered the change. Note that for most connectors, changes won't be searchable until _after_ the request that triggered the change.
## Searching Specific Fields
By default, the index searches through all indexed fields.
This can be limited by arguments to the `search()` call.
$query = new SearchQuery();
$query->search('My house is on fire', array('Page_Title'));
// No results, since we're searching in title rather than page content
$results = singleton('MyIndex')->search($query);
## Searching Value Ranges
Most values can be expressed as ranges, most commonly dates or numbers.
To search for a range of values rather than an exact match,
use the `SearchQuery_Range` class. The range can include bounds on both sides,
or stay open ended by simply leaving the argument blank.
$query = new SearchQuery();
$query->search('My house is on fire');
// Only include documents edited in 2011 or earlier
$query->filter('Page_LastEdited', new SearchQuery_Range(null, '2011-12-31T23:59:59Z'));
$results = singleton('MyIndex')->search($query);
Note: At the moment, the date format is specific to the search implementation.
## Searching Empty or Existing Values
Since there's a type conversion between the SilverStripe database, object properties
and the search index persistence, its often not clear which condition is searched for.
Should it equal an empty string, or only match if the field wasn't indexed at all?
The `SearchQuery` API has the concept of a "missing" and "present" field value for this:
$query = new SearchQuery();
$query->search('My house is on fire');
// Needs a value, although it can be false
$query->filter('Page_ShowInMenus', SearchQuery::$present);
$results = singleton('MyIndex')->search($query);
## Indexing Multiple Classes
An index is a denormalized view of your data, so can hold data from more than one model.
As you can only search one index at a time, all searchable classes need to be included.
// File: mysite/code/MyIndex.php:
<?php
class MyIndex extends SolrIndex {
function init() {
$this->addClass('Page');
$this->addClass('Member');
$this->addFulltextField('Content'); // only applies to Page class
$this->addFulltextField('FirstName'); // only applies to Member class
}
}
## Indexing Relationships
TODO
## Weighting/Boosting Fields
Results aren't all created equal. Matches in some fields are more important
than others, for example terms in a page title rather than its content
might be considered more relevant to the user.
To account for this, a "weighting" (or "boosting") factor can be applied to each
searched field. The default is 1.0, anything below that will decrease the relevance,
anthing above increases it.
Example:
$query = new SearchQuery();
$query->search(
'My house is on fire',
null,
array(
'Page_Title' => 1.5,
'Page_Content' => 1.0
)
);
$results = singleton('MyIndex')->search($query);
## Filtering
## Connectors ## Connectors
### Solr ### Solr
@ -108,4 +193,8 @@ Before constructing your `SearchQuery`, conditionally switch to the "live" stage
if(!Permission::check('CMS_ACCESS_CMSMain')) Versioned::reading_stage('Live'); if(!Permission::check('CMS_ACCESS_CMSMain')) Versioned::reading_stage('Live');
$query = new SearchQuery(); $query = new SearchQuery();
// ... // ...
### How do I write nested/complex filters?
TODO