silverstripe-docsviewer/thirdparty/Zend/Search/Lucene/Search/QueryToken.php

226 lines
6.8 KiB
PHP
Executable File

<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: QueryToken.php 20096 2010-01-06 02:05:09Z bkarwin $
*/
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_QueryToken
{
/**
* Token types.
*/
const TT_WORD = 0; // Word
const TT_PHRASE = 1; // Phrase (one or several quoted words)
const TT_FIELD = 2; // Field name in 'field:word', field:<phrase> or field:(<subquery>) pairs
const TT_FIELD_INDICATOR = 3; // ':'
const TT_REQUIRED = 4; // '+'
const TT_PROHIBITED = 5; // '-'
const TT_FUZZY_PROX_MARK = 6; // '~'
const TT_BOOSTING_MARK = 7; // '^'
const TT_RANGE_INCL_START = 8; // '['
const TT_RANGE_INCL_END = 9; // ']'
const TT_RANGE_EXCL_START = 10; // '{'
const TT_RANGE_EXCL_END = 11; // '}'
const TT_SUBQUERY_START = 12; // '('
const TT_SUBQUERY_END = 13; // ')'
const TT_AND_LEXEME = 14; // 'AND' or 'and'
const TT_OR_LEXEME = 15; // 'OR' or 'or'
const TT_NOT_LEXEME = 16; // 'NOT' or 'not'
const TT_TO_LEXEME = 17; // 'TO' or 'to'
const TT_NUMBER = 18; // Number, like: 10, 0.8, .64, ....
/**
* Returns all possible lexeme types.
* It's used for syntax analyzer state machine initialization
*
* @return array
*/
public static function getTypes()
{
return array( self::TT_WORD,
self::TT_PHRASE,
self::TT_FIELD,
self::TT_FIELD_INDICATOR,
self::TT_REQUIRED,
self::TT_PROHIBITED,
self::TT_FUZZY_PROX_MARK,
self::TT_BOOSTING_MARK,
self::TT_RANGE_INCL_START,
self::TT_RANGE_INCL_END,
self::TT_RANGE_EXCL_START,
self::TT_RANGE_EXCL_END,
self::TT_SUBQUERY_START,
self::TT_SUBQUERY_END,
self::TT_AND_LEXEME,
self::TT_OR_LEXEME,
self::TT_NOT_LEXEME,
self::TT_TO_LEXEME,
self::TT_NUMBER
);
}
/**
* TokenCategories
*/
const TC_WORD = 0; // Word
const TC_PHRASE = 1; // Phrase (one or several quoted words)
const TC_NUMBER = 2; // Nubers, which are used with syntax elements. Ex. roam~0.8
const TC_SYNTAX_ELEMENT = 3; // + - ( ) [ ] { } ! || && ~ ^
/**
* Token type.
*
* @var integer
*/
public $type;
/**
* Token text.
*
* @var integer
*/
public $text;
/**
* Token position within query.
*
* @var integer
*/
public $position;
/**
* IndexReader constructor needs token type and token text as a parameters.
*
* @param integer $tokenCategory
* @param string $tokText
* @param integer $position
*/
public function __construct($tokenCategory, $tokenText, $position)
{
$this->text = $tokenText;
$this->position = $position + 1; // Start from 1
switch ($tokenCategory) {
case self::TC_WORD:
if (strtolower($tokenText) == 'and') {
$this->type = self::TT_AND_LEXEME;
} else if (strtolower($tokenText) == 'or') {
$this->type = self::TT_OR_LEXEME;
} else if (strtolower($tokenText) == 'not') {
$this->type = self::TT_NOT_LEXEME;
} else if (strtolower($tokenText) == 'to') {
$this->type = self::TT_TO_LEXEME;
} else {
$this->type = self::TT_WORD;
}
break;
case self::TC_PHRASE:
$this->type = self::TT_PHRASE;
break;
case self::TC_NUMBER:
$this->type = self::TT_NUMBER;
break;
case self::TC_SYNTAX_ELEMENT:
switch ($tokenText) {
case ':':
$this->type = self::TT_FIELD_INDICATOR;
break;
case '+':
$this->type = self::TT_REQUIRED;
break;
case '-':
$this->type = self::TT_PROHIBITED;
break;
case '~':
$this->type = self::TT_FUZZY_PROX_MARK;
break;
case '^':
$this->type = self::TT_BOOSTING_MARK;
break;
case '[':
$this->type = self::TT_RANGE_INCL_START;
break;
case ']':
$this->type = self::TT_RANGE_INCL_END;
break;
case '{':
$this->type = self::TT_RANGE_EXCL_START;
break;
case '}':
$this->type = self::TT_RANGE_EXCL_END;
break;
case '(':
$this->type = self::TT_SUBQUERY_START;
break;
case ')':
$this->type = self::TT_SUBQUERY_END;
break;
case '!':
$this->type = self::TT_NOT_LEXEME;
break;
case '&&':
$this->type = self::TT_AND_LEXEME;
break;
case '||':
$this->type = self::TT_OR_LEXEME;
break;
default:
include_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Unrecognized query syntax lexeme: \'' . $tokenText . '\'');
}
break;
case self::TC_NUMBER:
$this->type = self::TT_NUMBER;
default:
include_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Unrecognized lexeme type: \'' . $tokenCategory . '\'');
}
}
}