mirror of
https://github.com/silverstripe/silverstripe-docsviewer
synced 2024-10-22 09:05:56 +00:00
226 lines
6.8 KiB
PHP
Executable File
226 lines
6.8 KiB
PHP
Executable File
<?php
|
|
/**
|
|
* Zend Framework
|
|
*
|
|
* LICENSE
|
|
*
|
|
* This source file is subject to the new BSD license that is bundled
|
|
* with this package in the file LICENSE.txt.
|
|
* It is also available through the world-wide-web at this URL:
|
|
* http://framework.zend.com/license/new-bsd
|
|
* If you did not receive a copy of the license and are unable to
|
|
* obtain it through the world-wide-web, please send an email
|
|
* to license@zend.com so we can send you a copy immediately.
|
|
*
|
|
* @category Zend
|
|
* @package Zend_Search_Lucene
|
|
* @subpackage Search
|
|
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
|
* @license http://framework.zend.com/license/new-bsd New BSD License
|
|
* @version $Id: QueryToken.php 20096 2010-01-06 02:05:09Z bkarwin $
|
|
*/
|
|
|
|
/**
|
|
* @category Zend
|
|
* @package Zend_Search_Lucene
|
|
* @subpackage Search
|
|
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
|
* @license http://framework.zend.com/license/new-bsd New BSD License
|
|
*/
|
|
class Zend_Search_Lucene_Search_QueryToken
|
|
{
|
|
/**
|
|
* Token types.
|
|
*/
|
|
const TT_WORD = 0; // Word
|
|
const TT_PHRASE = 1; // Phrase (one or several quoted words)
|
|
const TT_FIELD = 2; // Field name in 'field:word', field:<phrase> or field:(<subquery>) pairs
|
|
const TT_FIELD_INDICATOR = 3; // ':'
|
|
const TT_REQUIRED = 4; // '+'
|
|
const TT_PROHIBITED = 5; // '-'
|
|
const TT_FUZZY_PROX_MARK = 6; // '~'
|
|
const TT_BOOSTING_MARK = 7; // '^'
|
|
const TT_RANGE_INCL_START = 8; // '['
|
|
const TT_RANGE_INCL_END = 9; // ']'
|
|
const TT_RANGE_EXCL_START = 10; // '{'
|
|
const TT_RANGE_EXCL_END = 11; // '}'
|
|
const TT_SUBQUERY_START = 12; // '('
|
|
const TT_SUBQUERY_END = 13; // ')'
|
|
const TT_AND_LEXEME = 14; // 'AND' or 'and'
|
|
const TT_OR_LEXEME = 15; // 'OR' or 'or'
|
|
const TT_NOT_LEXEME = 16; // 'NOT' or 'not'
|
|
const TT_TO_LEXEME = 17; // 'TO' or 'to'
|
|
const TT_NUMBER = 18; // Number, like: 10, 0.8, .64, ....
|
|
|
|
|
|
/**
|
|
* Returns all possible lexeme types.
|
|
* It's used for syntax analyzer state machine initialization
|
|
*
|
|
* @return array
|
|
*/
|
|
public static function getTypes()
|
|
{
|
|
return array( self::TT_WORD,
|
|
self::TT_PHRASE,
|
|
self::TT_FIELD,
|
|
self::TT_FIELD_INDICATOR,
|
|
self::TT_REQUIRED,
|
|
self::TT_PROHIBITED,
|
|
self::TT_FUZZY_PROX_MARK,
|
|
self::TT_BOOSTING_MARK,
|
|
self::TT_RANGE_INCL_START,
|
|
self::TT_RANGE_INCL_END,
|
|
self::TT_RANGE_EXCL_START,
|
|
self::TT_RANGE_EXCL_END,
|
|
self::TT_SUBQUERY_START,
|
|
self::TT_SUBQUERY_END,
|
|
self::TT_AND_LEXEME,
|
|
self::TT_OR_LEXEME,
|
|
self::TT_NOT_LEXEME,
|
|
self::TT_TO_LEXEME,
|
|
self::TT_NUMBER
|
|
);
|
|
}
|
|
|
|
|
|
/**
|
|
* TokenCategories
|
|
*/
|
|
const TC_WORD = 0; // Word
|
|
const TC_PHRASE = 1; // Phrase (one or several quoted words)
|
|
const TC_NUMBER = 2; // Nubers, which are used with syntax elements. Ex. roam~0.8
|
|
const TC_SYNTAX_ELEMENT = 3; // + - ( ) [ ] { } ! || && ~ ^
|
|
|
|
|
|
/**
|
|
* Token type.
|
|
*
|
|
* @var integer
|
|
*/
|
|
public $type;
|
|
|
|
/**
|
|
* Token text.
|
|
*
|
|
* @var integer
|
|
*/
|
|
public $text;
|
|
|
|
/**
|
|
* Token position within query.
|
|
*
|
|
* @var integer
|
|
*/
|
|
public $position;
|
|
|
|
|
|
/**
|
|
* IndexReader constructor needs token type and token text as a parameters.
|
|
*
|
|
* @param integer $tokenCategory
|
|
* @param string $tokText
|
|
* @param integer $position
|
|
*/
|
|
public function __construct($tokenCategory, $tokenText, $position)
|
|
{
|
|
$this->text = $tokenText;
|
|
$this->position = $position + 1; // Start from 1
|
|
|
|
switch ($tokenCategory) {
|
|
case self::TC_WORD:
|
|
if (strtolower($tokenText) == 'and') {
|
|
$this->type = self::TT_AND_LEXEME;
|
|
} else if (strtolower($tokenText) == 'or') {
|
|
$this->type = self::TT_OR_LEXEME;
|
|
} else if (strtolower($tokenText) == 'not') {
|
|
$this->type = self::TT_NOT_LEXEME;
|
|
} else if (strtolower($tokenText) == 'to') {
|
|
$this->type = self::TT_TO_LEXEME;
|
|
} else {
|
|
$this->type = self::TT_WORD;
|
|
}
|
|
break;
|
|
|
|
case self::TC_PHRASE:
|
|
$this->type = self::TT_PHRASE;
|
|
break;
|
|
|
|
case self::TC_NUMBER:
|
|
$this->type = self::TT_NUMBER;
|
|
break;
|
|
|
|
case self::TC_SYNTAX_ELEMENT:
|
|
switch ($tokenText) {
|
|
case ':':
|
|
$this->type = self::TT_FIELD_INDICATOR;
|
|
break;
|
|
|
|
case '+':
|
|
$this->type = self::TT_REQUIRED;
|
|
break;
|
|
|
|
case '-':
|
|
$this->type = self::TT_PROHIBITED;
|
|
break;
|
|
|
|
case '~':
|
|
$this->type = self::TT_FUZZY_PROX_MARK;
|
|
break;
|
|
|
|
case '^':
|
|
$this->type = self::TT_BOOSTING_MARK;
|
|
break;
|
|
|
|
case '[':
|
|
$this->type = self::TT_RANGE_INCL_START;
|
|
break;
|
|
|
|
case ']':
|
|
$this->type = self::TT_RANGE_INCL_END;
|
|
break;
|
|
|
|
case '{':
|
|
$this->type = self::TT_RANGE_EXCL_START;
|
|
break;
|
|
|
|
case '}':
|
|
$this->type = self::TT_RANGE_EXCL_END;
|
|
break;
|
|
|
|
case '(':
|
|
$this->type = self::TT_SUBQUERY_START;
|
|
break;
|
|
|
|
case ')':
|
|
$this->type = self::TT_SUBQUERY_END;
|
|
break;
|
|
|
|
case '!':
|
|
$this->type = self::TT_NOT_LEXEME;
|
|
break;
|
|
|
|
case '&&':
|
|
$this->type = self::TT_AND_LEXEME;
|
|
break;
|
|
|
|
case '||':
|
|
$this->type = self::TT_OR_LEXEME;
|
|
break;
|
|
|
|
default:
|
|
include_once 'Zend/Search/Lucene/Exception.php';
|
|
throw new Zend_Search_Lucene_Exception('Unrecognized query syntax lexeme: \'' . $tokenText . '\'');
|
|
}
|
|
break;
|
|
|
|
case self::TC_NUMBER:
|
|
$this->type = self::TT_NUMBER;
|
|
|
|
default:
|
|
include_once 'Zend/Search/Lucene/Exception.php';
|
|
throw new Zend_Search_Lucene_Exception('Unrecognized lexeme type: \'' . $tokenCategory . '\'');
|
|
}
|
|
}
|
|
}
|