mirror of
https://github.com/silverstripe/silverstripe-docsviewer
synced 2024-10-22 11:05:56 +02:00
ENHANCEMENT: included zend/search/lucene with the module to avoid including the entire lucene module. ENHANCEMENT: added hourly task as well as buildtask for dealing with cron updates. MINOR: updated documentation
This commit is contained in:
parent
59c9762dc0
commit
221cd700b6
17
README.md
17
README.md
@ -1,17 +1,20 @@
|
||||
# Documentation Viewer Module
|
||||
|
||||
## Maintainer Content
|
||||
* Will Rossiter (Nickname: willr, wrossiter) <will@silverstripe.com>
|
||||
## Maintainer Contact
|
||||
|
||||
* Will Rossiter (Nickname: willr, wrossiter)
|
||||
<will@silverstripe.com>
|
||||
|
||||
## Requirements
|
||||
* Tested on 2.4 and trunk
|
||||
|
||||
* SilverStripe 2.4
|
||||
|
||||
## Summary
|
||||
Read nested documentation files from the /docs/ folder in modules. To read documentation go to yoursite.com/dev/docs/.
|
||||
|
||||
It is likely this will be integrated into the core in future versions once it is polished.
|
||||
Reads markdown documentation files from the /docs/ folder in . To read documentation go to yoursite.com/dev/docs/.
|
||||
|
||||
For more documentation on how to use the module please read /docs/Writing-Documentation.md (or via this in /dev/docs/sapphiredocs/Writing-Documentation)
|
||||
For more documentation on how to use the module please read /docs/Writing-Documentation.md
|
||||
(or via this in /dev/docs/sapphiredocs/Writing-Documentation in your webbrowser)
|
||||
|
||||
## Syntax Highlighting ##
|
||||
|
||||
@ -55,4 +58,4 @@ To include the syntax highlighter source, add the following to your `Documentati
|
||||
Requirements::css('sapphiredocs/thirdparty/syntaxhighlighter/styles/shThemeRDark.css');
|
||||
|
||||
You can overload the `DocumentationViewer` class and add a custom route through `Director::addRule()`
|
||||
if you prefer not to modify the module file.
|
||||
if you prefer not to modify the module file.>>>>>>> .r115400
|
||||
|
@ -1,5 +1,13 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Documentation Configuration
|
||||
*
|
||||
* Please override any of these options in your own projects _config.php file.
|
||||
* For more information and documentation see sapphiredocs/docs/en
|
||||
*/
|
||||
|
||||
// default location for documentation
|
||||
Director::addRules(100, array(
|
||||
'dev/docs' => 'DocumentationViewer'
|
||||
));
|
@ -67,13 +67,10 @@ class DocumentationSearch {
|
||||
* Enable searching documentation
|
||||
*/
|
||||
public static function enable() {
|
||||
if(!class_exists('ZendSearchLuceneSearchable')) {
|
||||
return user_error('DocumentationSearch requires the ZendSearchLucene library', E_ERROR);
|
||||
}
|
||||
|
||||
self::$enabled = true;
|
||||
|
||||
ZendSearchLuceneSearchable::enable(array());
|
||||
|
||||
// include the zend search functionality
|
||||
set_include_path(get_include_path() . PATH_SEPARATOR . dirname(dirname(__FILE__)) . '/thirdparty/');
|
||||
}
|
||||
|
||||
/**
|
||||
@ -103,12 +100,19 @@ class DocumentationSearch {
|
||||
* Rebuilds the index if it out of date
|
||||
*/
|
||||
public function performSearch($query) {
|
||||
$index = Zend_Search_Lucene::open(self::get_index_location());
|
||||
try {
|
||||
$index = Zend_Search_Lucene::open(self::get_index_location());
|
||||
|
||||
Zend_Search_Lucene::setResultSetLimit(200);
|
||||
Zend_Search_Lucene::setResultSetLimit(200);
|
||||
|
||||
$this->results = $index->find($query);
|
||||
$this->totalResults = $index->numDocs();
|
||||
$this->results = $index->find($query);
|
||||
$this->totalResults = $index->numDocs();
|
||||
}
|
||||
catch(Zend_Search_Lucene_Exception $e) {
|
||||
// the reindexing task has not been run
|
||||
user_error('DocumentationSearch::performSearch() could not perform search as index does not exist.
|
||||
Please run /dev/tasks/RebuildLuceneDocsIndex', E_USER_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1,37 +1,48 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Rebuilds the search indexes for the documentation pages.
|
||||
*
|
||||
* For the hourly cron rebuild use RebuildLuceneDocusIndex_Hourly
|
||||
*
|
||||
* @package sapphiredocs
|
||||
* @subpackage tasks
|
||||
*/
|
||||
|
||||
class RebuildLuceneDocsIndex extends BuildTask {
|
||||
|
||||
/**
|
||||
* Builds the document index
|
||||
*
|
||||
* Perhaps we run this via a hourly / daily task rather than
|
||||
* based on the user. It's a
|
||||
*/
|
||||
protected $title = "Rebuild Documentation Search Indexes";
|
||||
|
||||
protected $description = "Rebuilds the indexes used for the search engine in sapphiredocs. Gathers all documentation files from your modules";
|
||||
|
||||
function run($request) {
|
||||
$this->rebuildIndexes();
|
||||
}
|
||||
|
||||
function rebuildIndexes($quiet = false) {
|
||||
require_once('../sapphiredocs/thirdparty/markdown/markdown.php');
|
||||
|
||||
ini_set("memory_limit", -1);
|
||||
ini_set('max_execution_time', 0);
|
||||
|
||||
|
||||
// only rebuild the index if we have to. Check for either flush or the time write.lock.file
|
||||
// was last altered
|
||||
$lock = DocumentationSearch::get_index_location() .'/write.lock.file';
|
||||
$lockFileFresh = (file_exists($lock) && filemtime($lock) > (time() - (60 * 60 * 24)));
|
||||
|
||||
if($lockFileFresh && !isset($_REQUEST['flush'])) return true;
|
||||
if($lockFileFresh && !isset($_REQUEST['flush'])) {
|
||||
if(!$quiet) {
|
||||
echo "Index recently rebuilt. If you want to force reindex use ?flush=1";
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
try {
|
||||
$index = Zend_Search_Lucene::open(DocumentationSearch::get_index_location());
|
||||
$index->removeReference();
|
||||
}
|
||||
catch (Zend_Search_Lucene_Exception $e) {
|
||||
|
||||
}
|
||||
catch (Zend_Search_Lucene_Exception $e) {}
|
||||
|
||||
try {
|
||||
$index = Zend_Search_Lucene::create(DocumentationSearch::get_index_location());
|
||||
@ -45,14 +56,15 @@ class RebuildLuceneDocsIndex extends BuildTask {
|
||||
|
||||
if($pages) {
|
||||
$count = 0;
|
||||
|
||||
// iconv complains about all the markdown formatting
|
||||
// turn off notices while we parse
|
||||
$error = error_reporting();
|
||||
error_reporting('E_ALL ^ E_NOTICE');
|
||||
|
||||
foreach($pages as $page) {
|
||||
$count++;
|
||||
|
||||
// iconv complains about all the markdown formatting
|
||||
// turn off notices while we parse
|
||||
$error = error_reporting();
|
||||
error_reporting('E_ALL ^ E_NOTICE');
|
||||
|
||||
if(!is_dir($page->getPath())) {
|
||||
$doc = new Zend_Search_Lucene_Document();
|
||||
$content = $page->getMarkdown();
|
||||
@ -65,11 +77,28 @@ class RebuildLuceneDocsIndex extends BuildTask {
|
||||
$doc->addField(Zend_Search_Lucene_Field::Keyword('Link', $page->Link()));
|
||||
$index->addDocument($doc);
|
||||
}
|
||||
|
||||
error_reporting($error);
|
||||
|
||||
if(!$quiet) echo "adding ". $page->getTitle() ."\n\n";
|
||||
}
|
||||
|
||||
error_reporting($error);
|
||||
}
|
||||
|
||||
$index->commit();
|
||||
|
||||
if(!$quiet) echo "complete.";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @package sapphiredocs
|
||||
* @subpackage tasks
|
||||
*/
|
||||
class RebuildLuceneDocusIndex_Hourly extends HourlyTask {
|
||||
|
||||
function process() {
|
||||
$reindex = new RebuildLuceneDocusIndex();
|
||||
|
||||
$reindex->rebuildIndexes(true);
|
||||
}
|
||||
}
|
@ -11,6 +11,20 @@ page.
|
||||
|
||||
## Setup
|
||||
|
||||
### Enabling Search
|
||||
|
||||
The module provides automatic search functionality via [Lucene Search](http://lucene.apache.org/java/docs/index.html). To enable search
|
||||
you need to add the following to your applications _config.php file:
|
||||
|
||||
DocumentationSearch::enable();
|
||||
|
||||
After adding that line you will also need to build the indexes of the search. You can run the following from your webbrowser or via sake
|
||||
|
||||
|
||||
|
||||
|
||||
### Using a URL other than /dev/docs/
|
||||
|
||||
By default, the documentation is available in `dev/docs`. If you want it to live on the webroot instead of a subfolder,
|
||||
add the following configuration to your `mysite/_config.php`:
|
||||
|
||||
@ -18,4 +32,5 @@ add the following configuration to your `mysite/_config.php`:
|
||||
Director::addRules(1, array(
|
||||
'$Action' => 'DocumentationViewer',
|
||||
'' => 'DocumentationViewer'
|
||||
));
|
||||
));
|
||||
|
37
thirdparty/Zend/Search/Exception.php
vendored
Normal file
37
thirdparty/Zend/Search/Exception.php
vendored
Normal file
@ -0,0 +1,37 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Exception.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Framework base exception
|
||||
*/
|
||||
require_once 'Zend/Exception.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Exception extends Zend_Exception
|
||||
{}
|
||||
|
1577
thirdparty/Zend/Search/Lucene.php
vendored
Normal file
1577
thirdparty/Zend/Search/Lucene.php
vendored
Normal file
@ -0,0 +1,1577 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Lucene.php 21640 2010-03-24 18:28:32Z alexander $
|
||||
*/
|
||||
|
||||
|
||||
/** User land classes and interfaces turned on by Zend/Search/Lucene.php file inclusion. */
|
||||
/** @todo Section should be removed with ZF 2.0 release as obsolete */
|
||||
|
||||
/** Zend_Search_Lucene_Document_Html */
|
||||
require_once 'Zend/Search/Lucene/Document/Html.php';
|
||||
|
||||
/** Zend_Search_Lucene_Document_Docx */
|
||||
require_once 'Zend/Search/Lucene/Document/Docx.php';
|
||||
|
||||
/** Zend_Search_Lucene_Document_Pptx */
|
||||
require_once 'Zend/Search/Lucene/Document/Pptx.php';
|
||||
|
||||
/** Zend_Search_Lucene_Document_Xlsx */
|
||||
require_once 'Zend/Search/Lucene/Document/Xlsx.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryParser */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParser.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryHit */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryHit.php';
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Term */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Term.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Phrase */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Phrase.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_MultiTerm */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Wildcard */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Wildcard.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Range */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Range.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Fuzzy */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Fuzzy.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Boolean */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Empty */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Insignificant */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
|
||||
|
||||
|
||||
|
||||
|
||||
/** Internally used classes */
|
||||
|
||||
/** Zend_Search_Lucene_Interface */
|
||||
require_once 'Zend/Search/Lucene/Interface.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_SegmentInfo */
|
||||
require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
|
||||
|
||||
/** Zend_Search_Lucene_LockManager */
|
||||
require_once 'Zend/Search/Lucene/LockManager.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
|
||||
{
|
||||
/**
|
||||
* Default field name for search
|
||||
*
|
||||
* Null means search through all fields
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private static $_defaultSearchField = null;
|
||||
|
||||
/**
|
||||
* Result set limit
|
||||
*
|
||||
* 0 means no limit
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private static $_resultSetLimit = 0;
|
||||
|
||||
/**
|
||||
* Terms per query limit
|
||||
*
|
||||
* 0 means no limit
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private static $_termsPerQueryLimit = 1024;
|
||||
|
||||
/**
|
||||
* File system adapter.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Storage_Directory
|
||||
*/
|
||||
private $_directory = null;
|
||||
|
||||
/**
|
||||
* File system adapter closing option
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
private $_closeDirOnExit = true;
|
||||
|
||||
/**
|
||||
* Writer for this index, not instantiated unless required.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Index_Writer
|
||||
*/
|
||||
private $_writer = null;
|
||||
|
||||
/**
|
||||
* Array of Zend_Search_Lucene_Index_SegmentInfo objects for current version of index.
|
||||
*
|
||||
* @var array Zend_Search_Lucene_Index_SegmentInfo
|
||||
*/
|
||||
private $_segmentInfos = array();
|
||||
|
||||
/**
|
||||
* Number of documents in this index.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_docCount = 0;
|
||||
|
||||
/**
|
||||
* Flag for index changes
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
private $_hasChanges = false;
|
||||
|
||||
|
||||
/**
|
||||
* Signal, that index is already closed, changes are fixed and resources are cleaned up
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
private $_closed = false;
|
||||
|
||||
/**
|
||||
* Number of references to the index object
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_refCount = 0;
|
||||
|
||||
/**
|
||||
* Current segment generation
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_generation;
|
||||
|
||||
const FORMAT_PRE_2_1 = 0;
|
||||
const FORMAT_2_1 = 1;
|
||||
const FORMAT_2_3 = 2;
|
||||
|
||||
|
||||
/**
|
||||
* Index format version
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_formatVersion;
|
||||
|
||||
/**
|
||||
* Create index
|
||||
*
|
||||
* @param mixed $directory
|
||||
* @return Zend_Search_Lucene_Interface
|
||||
*/
|
||||
public static function create($directory)
|
||||
{
|
||||
/** Zend_Search_Lucene_Proxy */
|
||||
require_once 'Zend/Search/Lucene/Proxy.php';
|
||||
|
||||
return new Zend_Search_Lucene_Proxy(new Zend_Search_Lucene($directory, true));
|
||||
}
|
||||
|
||||
/**
|
||||
* Open index
|
||||
*
|
||||
* @param mixed $directory
|
||||
* @return Zend_Search_Lucene_Interface
|
||||
*/
|
||||
public static function open($directory)
|
||||
{
|
||||
/** Zend_Search_Lucene_Proxy */
|
||||
require_once 'Zend/Search/Lucene/Proxy.php';
|
||||
|
||||
return new Zend_Search_Lucene_Proxy(new Zend_Search_Lucene($directory, false));
|
||||
}
|
||||
|
||||
/** Generation retrieving counter */
|
||||
const GENERATION_RETRIEVE_COUNT = 10;
|
||||
|
||||
/** Pause between generation retrieving attempts in milliseconds */
|
||||
const GENERATION_RETRIEVE_PAUSE = 50;
|
||||
|
||||
/**
|
||||
* Get current generation number
|
||||
*
|
||||
* Returns generation number
|
||||
* 0 means pre-2.1 index format
|
||||
* -1 means there are no segments files.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_Directory $directory
|
||||
* @return integer
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public static function getActualGeneration(Zend_Search_Lucene_Storage_Directory $directory)
|
||||
{
|
||||
/**
|
||||
* Zend_Search_Lucene uses segments.gen file to retrieve current generation number
|
||||
*
|
||||
* Apache Lucene index format documentation mentions this method only as a fallback method
|
||||
*
|
||||
* Nevertheless we use it according to the performance considerations
|
||||
*
|
||||
* @todo check if we can use some modification of Apache Lucene generation determination algorithm
|
||||
* without performance problems
|
||||
*/
|
||||
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
try {
|
||||
for ($count = 0; $count < self::GENERATION_RETRIEVE_COUNT; $count++) {
|
||||
// Try to get generation file
|
||||
$genFile = $directory->getFileObject('segments.gen', false);
|
||||
|
||||
$format = $genFile->readInt();
|
||||
if ($format != (int)0xFFFFFFFE) {
|
||||
throw new Zend_Search_Lucene_Exception('Wrong segments.gen file format');
|
||||
}
|
||||
|
||||
$gen1 = $genFile->readLong();
|
||||
$gen2 = $genFile->readLong();
|
||||
|
||||
if ($gen1 == $gen2) {
|
||||
return $gen1;
|
||||
}
|
||||
|
||||
usleep(self::GENERATION_RETRIEVE_PAUSE * 1000);
|
||||
}
|
||||
|
||||
// All passes are failed
|
||||
throw new Zend_Search_Lucene_Exception('Index is under processing now');
|
||||
} catch (Zend_Search_Lucene_Exception $e) {
|
||||
if (strpos($e->getMessage(), 'is not readable') !== false) {
|
||||
try {
|
||||
// Try to open old style segments file
|
||||
$segmentsFile = $directory->getFileObject('segments', false);
|
||||
|
||||
// It's pre-2.1 index
|
||||
return 0;
|
||||
} catch (Zend_Search_Lucene_Exception $e) {
|
||||
if (strpos($e->getMessage(), 'is not readable') !== false) {
|
||||
return -1;
|
||||
} else {
|
||||
throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get generation number associated with this index instance
|
||||
*
|
||||
* The same generation number in pair with document number or query string
|
||||
* guarantees to give the same result while index retrieving.
|
||||
* So it may be used for search result caching.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function getGeneration()
|
||||
{
|
||||
return $this->_generation;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get segments file name
|
||||
*
|
||||
* @param integer $generation
|
||||
* @return string
|
||||
*/
|
||||
public static function getSegmentFileName($generation)
|
||||
{
|
||||
if ($generation == 0) {
|
||||
return 'segments';
|
||||
}
|
||||
|
||||
return 'segments_' . base_convert($generation, 10, 36);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get index format version
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function getFormatVersion()
|
||||
{
|
||||
return $this->_formatVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set index format version.
|
||||
* Index is converted to this format at the nearest upfdate time
|
||||
*
|
||||
* @param int $formatVersion
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function setFormatVersion($formatVersion)
|
||||
{
|
||||
if ($formatVersion != self::FORMAT_PRE_2_1 &&
|
||||
$formatVersion != self::FORMAT_2_1 &&
|
||||
$formatVersion != self::FORMAT_2_3) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Unsupported index format');
|
||||
}
|
||||
|
||||
$this->_formatVersion = $formatVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read segments file for pre-2.1 Lucene index format
|
||||
*
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
private function _readPre21SegmentsFile()
|
||||
{
|
||||
$segmentsFile = $this->_directory->getFileObject('segments');
|
||||
|
||||
$format = $segmentsFile->readInt();
|
||||
|
||||
if ($format != (int)0xFFFFFFFF) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Wrong segments file format');
|
||||
}
|
||||
|
||||
// read version
|
||||
$segmentsFile->readLong();
|
||||
|
||||
// read segment name counter
|
||||
$segmentsFile->readInt();
|
||||
|
||||
$segments = $segmentsFile->readInt();
|
||||
|
||||
$this->_docCount = 0;
|
||||
|
||||
// read segmentInfos
|
||||
for ($count = 0; $count < $segments; $count++) {
|
||||
$segName = $segmentsFile->readString();
|
||||
$segSize = $segmentsFile->readInt();
|
||||
$this->_docCount += $segSize;
|
||||
|
||||
$this->_segmentInfos[$segName] =
|
||||
new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
|
||||
$segName,
|
||||
$segSize);
|
||||
}
|
||||
|
||||
// Use 2.1 as a target version. Index will be reorganized at update time.
|
||||
$this->_formatVersion = self::FORMAT_2_1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read segments file
|
||||
*
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
private function _readSegmentsFile()
|
||||
{
|
||||
$segmentsFile = $this->_directory->getFileObject(self::getSegmentFileName($this->_generation));
|
||||
|
||||
$format = $segmentsFile->readInt();
|
||||
|
||||
if ($format == (int)0xFFFFFFFC) {
|
||||
$this->_formatVersion = self::FORMAT_2_3;
|
||||
} else if ($format == (int)0xFFFFFFFD) {
|
||||
$this->_formatVersion = self::FORMAT_2_1;
|
||||
} else {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Unsupported segments file format');
|
||||
}
|
||||
|
||||
// read version
|
||||
$segmentsFile->readLong();
|
||||
|
||||
// read segment name counter
|
||||
$segmentsFile->readInt();
|
||||
|
||||
$segments = $segmentsFile->readInt();
|
||||
|
||||
$this->_docCount = 0;
|
||||
|
||||
// read segmentInfos
|
||||
for ($count = 0; $count < $segments; $count++) {
|
||||
$segName = $segmentsFile->readString();
|
||||
$segSize = $segmentsFile->readInt();
|
||||
|
||||
// 2.1+ specific properties
|
||||
$delGen = $segmentsFile->readLong();
|
||||
|
||||
if ($this->_formatVersion == self::FORMAT_2_3) {
|
||||
$docStoreOffset = $segmentsFile->readInt();
|
||||
|
||||
if ($docStoreOffset != (int)0xFFFFFFFF) {
|
||||
$docStoreSegment = $segmentsFile->readString();
|
||||
$docStoreIsCompoundFile = $segmentsFile->readByte();
|
||||
|
||||
$docStoreOptions = array('offset' => $docStoreOffset,
|
||||
'segment' => $docStoreSegment,
|
||||
'isCompound' => ($docStoreIsCompoundFile == 1));
|
||||
} else {
|
||||
$docStoreOptions = null;
|
||||
}
|
||||
} else {
|
||||
$docStoreOptions = null;
|
||||
}
|
||||
|
||||
$hasSingleNormFile = $segmentsFile->readByte();
|
||||
$numField = $segmentsFile->readInt();
|
||||
|
||||
$normGens = array();
|
||||
if ($numField != (int)0xFFFFFFFF) {
|
||||
for ($count1 = 0; $count1 < $numField; $count1++) {
|
||||
$normGens[] = $segmentsFile->readLong();
|
||||
}
|
||||
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Separate norm files are not supported. Optimize index to use it with Zend_Search_Lucene.');
|
||||
}
|
||||
|
||||
$isCompoundByte = $segmentsFile->readByte();
|
||||
|
||||
if ($isCompoundByte == 0xFF) {
|
||||
// The segment is not a compound file
|
||||
$isCompound = false;
|
||||
} else if ($isCompoundByte == 0x00) {
|
||||
// The status is unknown
|
||||
$isCompound = null;
|
||||
} else if ($isCompoundByte == 0x01) {
|
||||
// The segment is a compound file
|
||||
$isCompound = true;
|
||||
}
|
||||
|
||||
$this->_docCount += $segSize;
|
||||
|
||||
$this->_segmentInfos[$segName] =
|
||||
new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
|
||||
$segName,
|
||||
$segSize,
|
||||
$delGen,
|
||||
$docStoreOptions,
|
||||
$hasSingleNormFile,
|
||||
$isCompound);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Opens the index.
|
||||
*
|
||||
* IndexReader constructor needs Directory as a parameter. It should be
|
||||
* a string with a path to the index folder or a Directory object.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_Directory_Filesystem|string $directory
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function __construct($directory = null, $create = false)
|
||||
{
|
||||
if ($directory === null) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Exception('No index directory specified');
|
||||
}
|
||||
|
||||
if (is_string($directory)) {
|
||||
require_once 'Zend/Search/Lucene/Storage/Directory/Filesystem.php';
|
||||
$this->_directory = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory);
|
||||
$this->_closeDirOnExit = true;
|
||||
} else {
|
||||
$this->_directory = $directory;
|
||||
$this->_closeDirOnExit = false;
|
||||
}
|
||||
|
||||
$this->_segmentInfos = array();
|
||||
|
||||
// Mark index as "under processing" to prevent other processes from premature index cleaning
|
||||
Zend_Search_Lucene_LockManager::obtainReadLock($this->_directory);
|
||||
|
||||
$this->_generation = self::getActualGeneration($this->_directory);
|
||||
|
||||
if ($create) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
try {
|
||||
Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);
|
||||
} catch (Zend_Search_Lucene_Exception $e) {
|
||||
Zend_Search_Lucene_LockManager::releaseReadLock($this->_directory);
|
||||
|
||||
if (strpos($e->getMessage(), 'Can\'t obtain exclusive index lock') === false) {
|
||||
throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
|
||||
} else {
|
||||
throw new Zend_Search_Lucene_Exception('Can\'t create index. It\'s under processing now', 0, $e);
|
||||
}
|
||||
}
|
||||
|
||||
if ($this->_generation == -1) {
|
||||
// Directory doesn't contain existing index, start from 1
|
||||
$this->_generation = 1;
|
||||
$nameCounter = 0;
|
||||
} else {
|
||||
// Directory contains existing index
|
||||
$segmentsFile = $this->_directory->getFileObject(self::getSegmentFileName($this->_generation));
|
||||
$segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version)
|
||||
|
||||
$nameCounter = $segmentsFile->readInt();
|
||||
$this->_generation++;
|
||||
}
|
||||
|
||||
require_once 'Zend/Search/Lucene/Index/Writer.php';
|
||||
Zend_Search_Lucene_Index_Writer::createIndex($this->_directory, $this->_generation, $nameCounter);
|
||||
|
||||
Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
|
||||
}
|
||||
|
||||
if ($this->_generation == -1) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Index doesn\'t exists in the specified directory.');
|
||||
} else if ($this->_generation == 0) {
|
||||
$this->_readPre21SegmentsFile();
|
||||
} else {
|
||||
$this->_readSegmentsFile();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Close current index and free resources
|
||||
*/
|
||||
private function _close()
|
||||
{
|
||||
if ($this->_closed) {
|
||||
// index is already closed and resources are cleaned up
|
||||
return;
|
||||
}
|
||||
|
||||
$this->commit();
|
||||
|
||||
// Release "under processing" flag
|
||||
Zend_Search_Lucene_LockManager::releaseReadLock($this->_directory);
|
||||
|
||||
if ($this->_closeDirOnExit) {
|
||||
$this->_directory->close();
|
||||
}
|
||||
|
||||
$this->_directory = null;
|
||||
$this->_writer = null;
|
||||
$this->_segmentInfos = null;
|
||||
|
||||
$this->_closed = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add reference to the index object
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public function addReference()
|
||||
{
|
||||
$this->_refCount++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove reference from the index object
|
||||
*
|
||||
* When reference count becomes zero, index is closed and resources are cleaned up
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public function removeReference()
|
||||
{
|
||||
$this->_refCount--;
|
||||
|
||||
if ($this->_refCount == 0) {
|
||||
$this->_close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Object destructor
|
||||
*/
|
||||
public function __destruct()
|
||||
{
|
||||
$this->_close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an instance of Zend_Search_Lucene_Index_Writer for the index
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_Writer
|
||||
*/
|
||||
private function _getIndexWriter()
|
||||
{
|
||||
if ($this->_writer === null) {
|
||||
require_once 'Zend/Search/Lucene/Index/Writer.php';
|
||||
$this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory,
|
||||
$this->_segmentInfos,
|
||||
$this->_formatVersion);
|
||||
}
|
||||
|
||||
return $this->_writer;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the Zend_Search_Lucene_Storage_Directory instance for this index.
|
||||
*
|
||||
* @return Zend_Search_Lucene_Storage_Directory
|
||||
*/
|
||||
public function getDirectory()
|
||||
{
|
||||
return $this->_directory;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the total number of documents in this index (including deleted documents).
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function count()
|
||||
{
|
||||
return $this->_docCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns one greater than the largest possible document number.
|
||||
* This may be used to, e.g., determine how big to allocate a structure which will have
|
||||
* an element for every document number in an index.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function maxDoc()
|
||||
{
|
||||
return $this->count();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the total number of non-deleted documents in this index.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function numDocs()
|
||||
{
|
||||
$numDocs = 0;
|
||||
|
||||
foreach ($this->_segmentInfos as $segmentInfo) {
|
||||
$numDocs += $segmentInfo->numDocs();
|
||||
}
|
||||
|
||||
return $numDocs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks, that document is deleted
|
||||
*
|
||||
* @param integer $id
|
||||
* @return boolean
|
||||
* @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range
|
||||
*/
|
||||
public function isDeleted($id)
|
||||
{
|
||||
if ($id >= $this->_docCount) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
|
||||
}
|
||||
|
||||
$segmentStartId = 0;
|
||||
foreach ($this->_segmentInfos as $segmentInfo) {
|
||||
if ($segmentStartId + $segmentInfo->count() > $id) {
|
||||
break;
|
||||
}
|
||||
|
||||
$segmentStartId += $segmentInfo->count();
|
||||
}
|
||||
|
||||
return $segmentInfo->isDeleted($id - $segmentStartId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set default search field.
|
||||
*
|
||||
* Null means, that search is performed through all fields by default
|
||||
*
|
||||
* Default value is null
|
||||
*
|
||||
* @param string $fieldName
|
||||
*/
|
||||
public static function setDefaultSearchField($fieldName)
|
||||
{
|
||||
self::$_defaultSearchField = $fieldName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get default search field.
|
||||
*
|
||||
* Null means, that search is performed through all fields by default
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function getDefaultSearchField()
|
||||
{
|
||||
return self::$_defaultSearchField;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set result set limit.
|
||||
*
|
||||
* 0 (default) means no limit
|
||||
*
|
||||
* @param integer $limit
|
||||
*/
|
||||
public static function setResultSetLimit($limit)
|
||||
{
|
||||
self::$_resultSetLimit = $limit;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get result set limit.
|
||||
*
|
||||
* 0 means no limit
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public static function getResultSetLimit()
|
||||
{
|
||||
return self::$_resultSetLimit;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set terms per query limit.
|
||||
*
|
||||
* 0 means no limit
|
||||
*
|
||||
* @param integer $limit
|
||||
*/
|
||||
public static function setTermsPerQueryLimit($limit)
|
||||
{
|
||||
self::$_termsPerQueryLimit = $limit;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get result set limit.
|
||||
*
|
||||
* 0 (default) means no limit
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public static function getTermsPerQueryLimit()
|
||||
{
|
||||
return self::$_termsPerQueryLimit;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve index maxBufferedDocs option
|
||||
*
|
||||
* maxBufferedDocs is a minimal number of documents required before
|
||||
* the buffered in-memory documents are written into a new Segment
|
||||
*
|
||||
* Default value is 10
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function getMaxBufferedDocs()
|
||||
{
|
||||
return $this->_getIndexWriter()->maxBufferedDocs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set index maxBufferedDocs option
|
||||
*
|
||||
* maxBufferedDocs is a minimal number of documents required before
|
||||
* the buffered in-memory documents are written into a new Segment
|
||||
*
|
||||
* Default value is 10
|
||||
*
|
||||
* @param integer $maxBufferedDocs
|
||||
*/
|
||||
public function setMaxBufferedDocs($maxBufferedDocs)
|
||||
{
|
||||
$this->_getIndexWriter()->maxBufferedDocs = $maxBufferedDocs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve index maxMergeDocs option
|
||||
*
|
||||
* maxMergeDocs is a largest number of documents ever merged by addDocument().
|
||||
* Small values (e.g., less than 10,000) are best for interactive indexing,
|
||||
* as this limits the length of pauses while indexing to a few seconds.
|
||||
* Larger values are best for batched indexing and speedier searches.
|
||||
*
|
||||
* Default value is PHP_INT_MAX
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function getMaxMergeDocs()
|
||||
{
|
||||
return $this->_getIndexWriter()->maxMergeDocs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set index maxMergeDocs option
|
||||
*
|
||||
* maxMergeDocs is a largest number of documents ever merged by addDocument().
|
||||
* Small values (e.g., less than 10,000) are best for interactive indexing,
|
||||
* as this limits the length of pauses while indexing to a few seconds.
|
||||
* Larger values are best for batched indexing and speedier searches.
|
||||
*
|
||||
* Default value is PHP_INT_MAX
|
||||
*
|
||||
* @param integer $maxMergeDocs
|
||||
*/
|
||||
public function setMaxMergeDocs($maxMergeDocs)
|
||||
{
|
||||
$this->_getIndexWriter()->maxMergeDocs = $maxMergeDocs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve index mergeFactor option
|
||||
*
|
||||
* mergeFactor determines how often segment indices are merged by addDocument().
|
||||
* With smaller values, less RAM is used while indexing,
|
||||
* and searches on unoptimized indices are faster,
|
||||
* but indexing speed is slower.
|
||||
* With larger values, more RAM is used during indexing,
|
||||
* and while searches on unoptimized indices are slower,
|
||||
* indexing is faster.
|
||||
* Thus larger values (> 10) are best for batch index creation,
|
||||
* and smaller values (< 10) for indices that are interactively maintained.
|
||||
*
|
||||
* Default value is 10
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function getMergeFactor()
|
||||
{
|
||||
return $this->_getIndexWriter()->mergeFactor;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set index mergeFactor option
|
||||
*
|
||||
* mergeFactor determines how often segment indices are merged by addDocument().
|
||||
* With smaller values, less RAM is used while indexing,
|
||||
* and searches on unoptimized indices are faster,
|
||||
* but indexing speed is slower.
|
||||
* With larger values, more RAM is used during indexing,
|
||||
* and while searches on unoptimized indices are slower,
|
||||
* indexing is faster.
|
||||
* Thus larger values (> 10) are best for batch index creation,
|
||||
* and smaller values (< 10) for indices that are interactively maintained.
|
||||
*
|
||||
* Default value is 10
|
||||
*
|
||||
* @param integer $maxMergeDocs
|
||||
*/
|
||||
public function setMergeFactor($mergeFactor)
|
||||
{
|
||||
$this->_getIndexWriter()->mergeFactor = $mergeFactor;
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a query against the index and returns an array
|
||||
* of Zend_Search_Lucene_Search_QueryHit objects.
|
||||
* Input is a string or Zend_Search_Lucene_Search_Query.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_QueryParser|string $query
|
||||
* @return array Zend_Search_Lucene_Search_QueryHit
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function find($query)
|
||||
{
|
||||
if (is_string($query)) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParser.php';
|
||||
|
||||
$query = Zend_Search_Lucene_Search_QueryParser::parse($query);
|
||||
}
|
||||
|
||||
if (!$query instanceof Zend_Search_Lucene_Search_Query) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Query must be a string or Zend_Search_Lucene_Search_Query object');
|
||||
}
|
||||
|
||||
$this->commit();
|
||||
|
||||
$hits = array();
|
||||
$scores = array();
|
||||
$ids = array();
|
||||
|
||||
$query = $query->rewrite($this)->optimize($this);
|
||||
|
||||
$query->execute($this);
|
||||
|
||||
$topScore = 0;
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryHit */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryHit.php';
|
||||
|
||||
foreach ($query->matchedDocs() as $id => $num) {
|
||||
$docScore = $query->score($id, $this);
|
||||
if( $docScore != 0 ) {
|
||||
$hit = new Zend_Search_Lucene_Search_QueryHit($this);
|
||||
$hit->id = $id;
|
||||
$hit->score = $docScore;
|
||||
|
||||
$hits[] = $hit;
|
||||
$ids[] = $id;
|
||||
$scores[] = $docScore;
|
||||
|
||||
if ($docScore > $topScore) {
|
||||
$topScore = $docScore;
|
||||
}
|
||||
}
|
||||
|
||||
if (self::$_resultSetLimit != 0 && count($hits) >= self::$_resultSetLimit) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (count($hits) == 0) {
|
||||
// skip sorting, which may cause a error on empty index
|
||||
return array();
|
||||
}
|
||||
|
||||
if ($topScore > 1) {
|
||||
foreach ($hits as $hit) {
|
||||
$hit->score /= $topScore;
|
||||
}
|
||||
}
|
||||
|
||||
if (func_num_args() == 1) {
|
||||
// sort by scores
|
||||
array_multisort($scores, SORT_DESC, SORT_NUMERIC,
|
||||
$ids, SORT_ASC, SORT_NUMERIC,
|
||||
$hits);
|
||||
} else {
|
||||
// sort by given field names
|
||||
|
||||
$argList = func_get_args();
|
||||
$fieldNames = $this->getFieldNames();
|
||||
$sortArgs = array();
|
||||
|
||||
// PHP 5.3 now expects all arguments to array_multisort be passed by
|
||||
// reference (if it's invoked through call_user_func_array());
|
||||
// since constants can't be passed by reference, create some placeholder variables.
|
||||
$sortReg = SORT_REGULAR;
|
||||
$sortAsc = SORT_ASC;
|
||||
$sortNum = SORT_NUMERIC;
|
||||
|
||||
$sortFieldValues = array();
|
||||
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
for ($count = 1; $count < count($argList); $count++) {
|
||||
$fieldName = $argList[$count];
|
||||
|
||||
if (!is_string($fieldName)) {
|
||||
throw new Zend_Search_Lucene_Exception('Field name must be a string.');
|
||||
}
|
||||
|
||||
if (strtolower($fieldName) == 'score') {
|
||||
$sortArgs[] = &$scores;
|
||||
} else {
|
||||
if (!in_array($fieldName, $fieldNames)) {
|
||||
throw new Zend_Search_Lucene_Exception('Wrong field name.');
|
||||
}
|
||||
|
||||
if (!isset($sortFieldValues[$fieldName])) {
|
||||
$valuesArray = array();
|
||||
foreach ($hits as $hit) {
|
||||
try {
|
||||
$value = $hit->getDocument()->getFieldValue($fieldName);
|
||||
} catch (Zend_Search_Lucene_Exception $e) {
|
||||
if (strpos($e->getMessage(), 'not found') === false) {
|
||||
throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
|
||||
} else {
|
||||
$value = null;
|
||||
}
|
||||
}
|
||||
|
||||
$valuesArray[] = $value;
|
||||
}
|
||||
|
||||
// Collect loaded values in $sortFieldValues
|
||||
// Required for PHP 5.3 which translates references into values when source
|
||||
// variable is destroyed
|
||||
$sortFieldValues[$fieldName] = $valuesArray;
|
||||
}
|
||||
|
||||
$sortArgs[] = &$sortFieldValues[$fieldName];
|
||||
}
|
||||
|
||||
if ($count + 1 < count($argList) && is_integer($argList[$count+1])) {
|
||||
$count++;
|
||||
$sortArgs[] = &$argList[$count];
|
||||
|
||||
if ($count + 1 < count($argList) && is_integer($argList[$count+1])) {
|
||||
$count++;
|
||||
$sortArgs[] = &$argList[$count];
|
||||
} else {
|
||||
if ($argList[$count] == SORT_ASC || $argList[$count] == SORT_DESC) {
|
||||
$sortArgs[] = &$sortReg;
|
||||
} else {
|
||||
$sortArgs[] = &$sortAsc;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
$sortArgs[] = &$sortAsc;
|
||||
$sortArgs[] = &$sortReg;
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by id's if values are equal
|
||||
$sortArgs[] = &$ids;
|
||||
$sortArgs[] = &$sortAsc;
|
||||
$sortArgs[] = &$sortNum;
|
||||
|
||||
// Array to be sorted
|
||||
$sortArgs[] = &$hits;
|
||||
|
||||
// Do sort
|
||||
call_user_func_array('array_multisort', $sortArgs);
|
||||
}
|
||||
|
||||
return $hits;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a list of all unique field names that exist in this index.
|
||||
*
|
||||
* @param boolean $indexed
|
||||
* @return array
|
||||
*/
|
||||
public function getFieldNames($indexed = false)
|
||||
{
|
||||
$result = array();
|
||||
foreach( $this->_segmentInfos as $segmentInfo ) {
|
||||
$result = array_merge($result, $segmentInfo->getFields($indexed));
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a Zend_Search_Lucene_Document object for the document
|
||||
* number $id in this index.
|
||||
*
|
||||
* @param integer|Zend_Search_Lucene_Search_QueryHit $id
|
||||
* @return Zend_Search_Lucene_Document
|
||||
* @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range
|
||||
*/
|
||||
public function getDocument($id)
|
||||
{
|
||||
if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {
|
||||
/* @var $id Zend_Search_Lucene_Search_QueryHit */
|
||||
$id = $id->id;
|
||||
}
|
||||
|
||||
if ($id >= $this->_docCount) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
|
||||
}
|
||||
|
||||
$segmentStartId = 0;
|
||||
foreach ($this->_segmentInfos as $segmentInfo) {
|
||||
if ($segmentStartId + $segmentInfo->count() > $id) {
|
||||
break;
|
||||
}
|
||||
|
||||
$segmentStartId += $segmentInfo->count();
|
||||
}
|
||||
|
||||
$fdxFile = $segmentInfo->openCompoundFile('.fdx');
|
||||
$fdxFile->seek(($id-$segmentStartId)*8, SEEK_CUR);
|
||||
$fieldValuesPosition = $fdxFile->readLong();
|
||||
|
||||
$fdtFile = $segmentInfo->openCompoundFile('.fdt');
|
||||
$fdtFile->seek($fieldValuesPosition, SEEK_CUR);
|
||||
$fieldCount = $fdtFile->readVInt();
|
||||
|
||||
$doc = new Zend_Search_Lucene_Document();
|
||||
for ($count = 0; $count < $fieldCount; $count++) {
|
||||
$fieldNum = $fdtFile->readVInt();
|
||||
$bits = $fdtFile->readByte();
|
||||
|
||||
$fieldInfo = $segmentInfo->getField($fieldNum);
|
||||
|
||||
if (!($bits & 2)) { // Text data
|
||||
$field = new Zend_Search_Lucene_Field($fieldInfo->name,
|
||||
$fdtFile->readString(),
|
||||
'UTF-8',
|
||||
true,
|
||||
$fieldInfo->isIndexed,
|
||||
$bits & 1 );
|
||||
} else { // Binary data
|
||||
$field = new Zend_Search_Lucene_Field($fieldInfo->name,
|
||||
$fdtFile->readBinary(),
|
||||
'',
|
||||
true,
|
||||
$fieldInfo->isIndexed,
|
||||
$bits & 1,
|
||||
true );
|
||||
}
|
||||
|
||||
$doc->addField($field);
|
||||
}
|
||||
|
||||
return $doc;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns true if index contain documents with specified term.
|
||||
*
|
||||
* Is used for query optimization.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @return boolean
|
||||
*/
|
||||
public function hasTerm(Zend_Search_Lucene_Index_Term $term)
|
||||
{
|
||||
foreach ($this->_segmentInfos as $segInfo) {
|
||||
if ($segInfo->getTermInfo($term) !== null) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns IDs of all documents containing term.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
* @return array
|
||||
*/
|
||||
public function termDocs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
|
||||
{
|
||||
$subResults = array();
|
||||
$segmentStartDocId = 0;
|
||||
|
||||
foreach ($this->_segmentInfos as $segmentInfo) {
|
||||
$subResults[] = $segmentInfo->termDocs($term, $segmentStartDocId, $docsFilter);
|
||||
|
||||
$segmentStartDocId += $segmentInfo->count();
|
||||
}
|
||||
|
||||
if (count($subResults) == 0) {
|
||||
return array();
|
||||
} else if (count($subResults) == 1) {
|
||||
// Index is optimized (only one segment)
|
||||
// Do not perform array reindexing
|
||||
return reset($subResults);
|
||||
} else {
|
||||
$result = call_user_func_array('array_merge', $subResults);
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns documents filter for all documents containing term.
|
||||
*
|
||||
* It performs the same operation as termDocs, but return result as
|
||||
* Zend_Search_Lucene_Index_DocsFilter object
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
* @return Zend_Search_Lucene_Index_DocsFilter
|
||||
*/
|
||||
public function termDocsFilter(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
|
||||
{
|
||||
$segmentStartDocId = 0;
|
||||
$result = new Zend_Search_Lucene_Index_DocsFilter();
|
||||
|
||||
foreach ($this->_segmentInfos as $segmentInfo) {
|
||||
$subResults[] = $segmentInfo->termDocs($term, $segmentStartDocId, $docsFilter);
|
||||
|
||||
$segmentStartDocId += $segmentInfo->count();
|
||||
}
|
||||
|
||||
if (count($subResults) == 0) {
|
||||
return array();
|
||||
} else if (count($subResults) == 1) {
|
||||
// Index is optimized (only one segment)
|
||||
// Do not perform array reindexing
|
||||
return reset($subResults);
|
||||
} else {
|
||||
$result = call_user_func_array('array_merge', $subResults);
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns an array of all term freqs.
|
||||
* Result array structure: array(docId => freq, ...)
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
* @return integer
|
||||
*/
|
||||
public function termFreqs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
|
||||
{
|
||||
$result = array();
|
||||
$segmentStartDocId = 0;
|
||||
foreach ($this->_segmentInfos as $segmentInfo) {
|
||||
$result += $segmentInfo->termFreqs($term, $segmentStartDocId, $docsFilter);
|
||||
|
||||
$segmentStartDocId += $segmentInfo->count();
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of all term positions in the documents.
|
||||
* Result array structure: array(docId => array(pos1, pos2, ...), ...)
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
* @return array
|
||||
*/
|
||||
public function termPositions(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
|
||||
{
|
||||
$result = array();
|
||||
$segmentStartDocId = 0;
|
||||
foreach ($this->_segmentInfos as $segmentInfo) {
|
||||
$result += $segmentInfo->termPositions($term, $segmentStartDocId, $docsFilter);
|
||||
|
||||
$segmentStartDocId += $segmentInfo->count();
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the number of documents in this index containing the $term.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @return integer
|
||||
*/
|
||||
public function docFreq(Zend_Search_Lucene_Index_Term $term)
|
||||
{
|
||||
$result = 0;
|
||||
foreach ($this->_segmentInfos as $segInfo) {
|
||||
$termInfo = $segInfo->getTermInfo($term);
|
||||
if ($termInfo !== null) {
|
||||
$result += $termInfo->docFreq;
|
||||
}
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrive similarity used by index reader
|
||||
*
|
||||
* @return Zend_Search_Lucene_Search_Similarity
|
||||
*/
|
||||
public function getSimilarity()
|
||||
{
|
||||
/** Zend_Search_Lucene_Search_Similarity */
|
||||
require_once 'Zend/Search/Lucene/Search/Similarity.php';
|
||||
|
||||
return Zend_Search_Lucene_Search_Similarity::getDefault();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a normalization factor for "field, document" pair.
|
||||
*
|
||||
* @param integer $id
|
||||
* @param string $fieldName
|
||||
* @return float
|
||||
*/
|
||||
public function norm($id, $fieldName)
|
||||
{
|
||||
if ($id >= $this->_docCount) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$segmentStartId = 0;
|
||||
foreach ($this->_segmentInfos as $segInfo) {
|
||||
if ($segmentStartId + $segInfo->count() > $id) {
|
||||
break;
|
||||
}
|
||||
|
||||
$segmentStartId += $segInfo->count();
|
||||
}
|
||||
|
||||
if ($segInfo->isDeleted($id - $segmentStartId)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return $segInfo->norm($id - $segmentStartId, $fieldName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if any documents have been deleted from this index.
|
||||
*
|
||||
* @return boolean
|
||||
*/
|
||||
public function hasDeletions()
|
||||
{
|
||||
foreach ($this->_segmentInfos as $segmentInfo) {
|
||||
if ($segmentInfo->hasDeletions()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Deletes a document from the index.
|
||||
* $id is an internal document id
|
||||
*
|
||||
* @param integer|Zend_Search_Lucene_Search_QueryHit $id
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function delete($id)
|
||||
{
|
||||
if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {
|
||||
/* @var $id Zend_Search_Lucene_Search_QueryHit */
|
||||
$id = $id->id;
|
||||
}
|
||||
|
||||
if ($id >= $this->_docCount) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
|
||||
}
|
||||
|
||||
$segmentStartId = 0;
|
||||
foreach ($this->_segmentInfos as $segmentInfo) {
|
||||
if ($segmentStartId + $segmentInfo->count() > $id) {
|
||||
break;
|
||||
}
|
||||
|
||||
$segmentStartId += $segmentInfo->count();
|
||||
}
|
||||
$segmentInfo->delete($id - $segmentStartId);
|
||||
|
||||
$this->_hasChanges = true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Adds a document to this index.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Document $document
|
||||
*/
|
||||
public function addDocument(Zend_Search_Lucene_Document $document)
|
||||
{
|
||||
$this->_getIndexWriter()->addDocument($document);
|
||||
$this->_docCount++;
|
||||
|
||||
$this->_hasChanges = true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Update document counter
|
||||
*/
|
||||
private function _updateDocCount()
|
||||
{
|
||||
$this->_docCount = 0;
|
||||
foreach ($this->_segmentInfos as $segInfo) {
|
||||
$this->_docCount += $segInfo->count();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit changes resulting from delete() or undeleteAll() operations.
|
||||
*
|
||||
* @todo undeleteAll processing.
|
||||
*/
|
||||
public function commit()
|
||||
{
|
||||
if ($this->_hasChanges) {
|
||||
$this->_getIndexWriter()->commit();
|
||||
|
||||
$this->_updateDocCount();
|
||||
|
||||
$this->_hasChanges = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Optimize index.
|
||||
*
|
||||
* Merges all segments into one
|
||||
*/
|
||||
public function optimize()
|
||||
{
|
||||
// Commit changes if any changes have been made
|
||||
$this->commit();
|
||||
|
||||
if (count($this->_segmentInfos) > 1 || $this->hasDeletions()) {
|
||||
$this->_getIndexWriter()->optimize();
|
||||
$this->_updateDocCount();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns an array of all terms in this index.
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function terms()
|
||||
{
|
||||
$result = array();
|
||||
|
||||
/** Zend_Search_Lucene_Index_TermsPriorityQueue */
|
||||
require_once 'Zend/Search/Lucene/Index/TermsPriorityQueue.php';
|
||||
|
||||
$segmentInfoQueue = new Zend_Search_Lucene_Index_TermsPriorityQueue();
|
||||
|
||||
foreach ($this->_segmentInfos as $segmentInfo) {
|
||||
$segmentInfo->resetTermsStream();
|
||||
|
||||
// Skip "empty" segments
|
||||
if ($segmentInfo->currentTerm() !== null) {
|
||||
$segmentInfoQueue->put($segmentInfo);
|
||||
}
|
||||
}
|
||||
|
||||
while (($segmentInfo = $segmentInfoQueue->pop()) !== null) {
|
||||
if ($segmentInfoQueue->top() === null ||
|
||||
$segmentInfoQueue->top()->currentTerm()->key() !=
|
||||
$segmentInfo->currentTerm()->key()) {
|
||||
// We got new term
|
||||
$result[] = $segmentInfo->currentTerm();
|
||||
}
|
||||
|
||||
if ($segmentInfo->nextTerm() !== null) {
|
||||
// Put segment back into the priority queue
|
||||
$segmentInfoQueue->put($segmentInfo);
|
||||
}
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Terms stream priority queue object
|
||||
*
|
||||
* @var Zend_Search_Lucene_TermStreamsPriorityQueue
|
||||
*/
|
||||
private $_termsStream = null;
|
||||
|
||||
/**
|
||||
* Reset terms stream.
|
||||
*/
|
||||
public function resetTermsStream()
|
||||
{
|
||||
if ($this->_termsStream === null) {
|
||||
/** Zend_Search_Lucene_TermStreamsPriorityQueue */
|
||||
require_once 'Zend/Search/Lucene/TermStreamsPriorityQueue.php';
|
||||
|
||||
$this->_termsStream = new Zend_Search_Lucene_TermStreamsPriorityQueue($this->_segmentInfos);
|
||||
} else {
|
||||
$this->_termsStream->resetTermsStream();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Skip terms stream up to specified term preffix.
|
||||
*
|
||||
* Prefix contains fully specified field info and portion of searched term
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $prefix
|
||||
*/
|
||||
public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
|
||||
{
|
||||
$this->_termsStream->skipTo($prefix);
|
||||
}
|
||||
|
||||
/**
|
||||
* Scans terms dictionary and returns next term
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_Term|null
|
||||
*/
|
||||
public function nextTerm()
|
||||
{
|
||||
return $this->_termsStream->nextTerm();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns term in current position
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_Term|null
|
||||
*/
|
||||
public function currentTerm()
|
||||
{
|
||||
return $this->_termsStream->currentTerm();
|
||||
}
|
||||
|
||||
/**
|
||||
* Close terms stream
|
||||
*
|
||||
* Should be used for resources clean up if stream is not read up to the end
|
||||
*/
|
||||
public function closeTermsStream()
|
||||
{
|
||||
$this->_termsStream->closeTermsStream();
|
||||
$this->_termsStream = null;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
@todo UNIMPLEMENTED
|
||||
*************************************************************************/
|
||||
/**
|
||||
* Undeletes all documents currently marked as deleted in this index.
|
||||
*
|
||||
* @todo Implementation
|
||||
*/
|
||||
public function undeleteAll()
|
||||
{}
|
||||
}
|
175
thirdparty/Zend/Search/Lucene/Analysis/Analyzer.php
vendored
Normal file
175
thirdparty/Zend/Search/Lucene/Analysis/Analyzer.php
vendored
Normal file
@ -0,0 +1,175 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Analyzer.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** User land classes and interfaces turned on by Zend/Search/Analyzer.php file inclusion. */
|
||||
/** @todo Section should be removed with ZF 2.0 release as obsolete */
|
||||
if (!defined('ZEND_SEARCH_LUCENE_COMMON_ANALYZER_PROCESSED')) {
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8 */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8.php';
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8/CaseInsensitive.php';
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num.php';
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num/CaseInsensitive.php';
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer_Common_Text */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php';
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php';
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum.php';
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum_CaseInsensitive */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum/CaseInsensitive.php';
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* An Analyzer is used to analyze text.
|
||||
* It thus represents a policy for extracting index terms from text.
|
||||
*
|
||||
* Note:
|
||||
* Lucene Java implementation is oriented to streams. It provides effective work
|
||||
* with a huge documents (more then 20Mb).
|
||||
* But engine itself is not oriented such documents.
|
||||
* Thus Zend_Search_Lucene analysis API works with data strings and sets (arrays).
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
abstract class Zend_Search_Lucene_Analysis_Analyzer
|
||||
{
|
||||
/**
|
||||
* The Analyzer implementation used by default.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Analysis_Analyzer
|
||||
*/
|
||||
private static $_defaultImpl;
|
||||
|
||||
/**
|
||||
* Input string
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $_input = null;
|
||||
|
||||
/**
|
||||
* Input string encoding
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $_encoding = '';
|
||||
|
||||
/**
|
||||
* Tokenize text to a terms
|
||||
* Returns array of Zend_Search_Lucene_Analysis_Token objects
|
||||
*
|
||||
* Tokens are returned in UTF-8 (internal Zend_Search_Lucene encoding)
|
||||
*
|
||||
* @param string $data
|
||||
* @return array
|
||||
*/
|
||||
public function tokenize($data, $encoding = '')
|
||||
{
|
||||
$this->setInput($data, $encoding);
|
||||
|
||||
$tokenList = array();
|
||||
while (($nextToken = $this->nextToken()) !== null) {
|
||||
$tokenList[] = $nextToken;
|
||||
}
|
||||
|
||||
return $tokenList;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Tokenization stream API
|
||||
* Set input
|
||||
*
|
||||
* @param string $data
|
||||
*/
|
||||
public function setInput($data, $encoding = '')
|
||||
{
|
||||
$this->_input = $data;
|
||||
$this->_encoding = $encoding;
|
||||
$this->reset();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset token stream
|
||||
*/
|
||||
abstract public function reset();
|
||||
|
||||
/**
|
||||
* Tokenization stream API
|
||||
* Get next token
|
||||
* Returns null at the end of stream
|
||||
*
|
||||
* Tokens are returned in UTF-8 (internal Zend_Search_Lucene encoding)
|
||||
*
|
||||
* @return Zend_Search_Lucene_Analysis_Token|null
|
||||
*/
|
||||
abstract public function nextToken();
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Set the default Analyzer implementation used by indexing code.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Analysis_Analyzer $similarity
|
||||
*/
|
||||
public static function setDefault(Zend_Search_Lucene_Analysis_Analyzer $analyzer)
|
||||
{
|
||||
self::$_defaultImpl = $analyzer;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return the default Analyzer implementation used by indexing code.
|
||||
*
|
||||
* @return Zend_Search_Lucene_Analysis_Analyzer
|
||||
*/
|
||||
public static function getDefault()
|
||||
{
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php';
|
||||
|
||||
if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Analysis_Analyzer) {
|
||||
self::$_defaultImpl = new Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive();
|
||||
}
|
||||
|
||||
return self::$_defaultImpl;
|
||||
}
|
||||
}
|
||||
|
92
thirdparty/Zend/Search/Lucene/Analysis/Analyzer/Common.php
vendored
Normal file
92
thirdparty/Zend/Search/Lucene/Analysis/Analyzer/Common.php
vendored
Normal file
@ -0,0 +1,92 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Common.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Define constant used to provide correct file processing order */
|
||||
/** @todo Section should be removed with ZF 2.0 release as obsolete */
|
||||
define('ZEND_SEARCH_LUCENE_COMMON_ANALYZER_PROCESSED', true);
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Token */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Token.php';
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_TokenFilter */
|
||||
require_once 'Zend/Search/Lucene/Analysis/TokenFilter.php';
|
||||
|
||||
|
||||
/**
|
||||
* Common implementation of the Zend_Search_Lucene_Analysis_Analyzer interface.
|
||||
* There are several standard standard subclasses provided by Zend_Search_Lucene/Analysis
|
||||
* subpackage: Zend_Search_Lucene_Analysis_Analyzer_Common_Text, ZSearchHTMLAnalyzer, ZSearchXMLAnalyzer.
|
||||
*
|
||||
* @todo ZSearchHTMLAnalyzer and ZSearchXMLAnalyzer implementation
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
abstract class Zend_Search_Lucene_Analysis_Analyzer_Common extends Zend_Search_Lucene_Analysis_Analyzer
|
||||
{
|
||||
/**
|
||||
* The set of Token filters applied to the Token stream.
|
||||
* Array of Zend_Search_Lucene_Analysis_TokenFilter objects.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_filters = array();
|
||||
|
||||
/**
|
||||
* Add Token filter to the Analyzer
|
||||
*
|
||||
* @param Zend_Search_Lucene_Analysis_TokenFilter $filter
|
||||
*/
|
||||
public function addFilter(Zend_Search_Lucene_Analysis_TokenFilter $filter)
|
||||
{
|
||||
$this->_filters[] = $filter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply filters to the token. Can return null when the token was removed.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Analysis_Token $token
|
||||
* @return Zend_Search_Lucene_Analysis_Token
|
||||
*/
|
||||
public function normalize(Zend_Search_Lucene_Analysis_Token $token)
|
||||
{
|
||||
foreach ($this->_filters as $filter) {
|
||||
$token = $filter->normalize($token);
|
||||
|
||||
// resulting token can be null if the filter removes it
|
||||
if ($token === null) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
return $token;
|
||||
}
|
||||
}
|
||||
|
96
thirdparty/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php
vendored
Normal file
96
thirdparty/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php
vendored
Normal file
@ -0,0 +1,96 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Text.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer_Common */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
class Zend_Search_Lucene_Analysis_Analyzer_Common_Text extends Zend_Search_Lucene_Analysis_Analyzer_Common
|
||||
{
|
||||
/**
|
||||
* Current position in a stream
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_position;
|
||||
|
||||
/**
|
||||
* Reset token stream
|
||||
*/
|
||||
public function reset()
|
||||
{
|
||||
$this->_position = 0;
|
||||
|
||||
if ($this->_input === null) {
|
||||
return;
|
||||
}
|
||||
|
||||
// convert input into ascii
|
||||
if (PHP_OS != 'AIX') {
|
||||
$this->_input = iconv($this->_encoding, 'ASCII//TRANSLIT', $this->_input);
|
||||
}
|
||||
$this->_encoding = 'ASCII';
|
||||
}
|
||||
|
||||
/**
|
||||
* Tokenization stream API
|
||||
* Get next token
|
||||
* Returns null at the end of stream
|
||||
*
|
||||
* @return Zend_Search_Lucene_Analysis_Token|null
|
||||
*/
|
||||
public function nextToken()
|
||||
{
|
||||
if ($this->_input === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
do {
|
||||
if (! preg_match('/[a-zA-Z]+/', $this->_input, $match, PREG_OFFSET_CAPTURE, $this->_position)) {
|
||||
// It covers both cases a) there are no matches (preg_match(...) === 0)
|
||||
// b) error occured (preg_match(...) === FALSE)
|
||||
return null;
|
||||
}
|
||||
|
||||
$str = $match[0][0];
|
||||
$pos = $match[0][1];
|
||||
$endpos = $pos + strlen($str);
|
||||
|
||||
$this->_position = $endpos;
|
||||
|
||||
$token = $this->normalize(new Zend_Search_Lucene_Analysis_Token($str, $pos, $endpos));
|
||||
} while ($token === null); // try again if token is skipped
|
||||
|
||||
return $token;
|
||||
}
|
||||
}
|
||||
|
47
thirdparty/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php
vendored
Normal file
47
thirdparty/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php
vendored
Normal file
@ -0,0 +1,47 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: CaseInsensitive.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer_Common_Text */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php';
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_TokenFilter_LowerCase */
|
||||
require_once 'Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
class Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive extends Zend_Search_Lucene_Analysis_Analyzer_Common_Text
|
||||
{
|
||||
public function __construct()
|
||||
{
|
||||
$this->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_LowerCase());
|
||||
}
|
||||
}
|
||||
|
95
thirdparty/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum.php
vendored
Normal file
95
thirdparty/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum.php
vendored
Normal file
@ -0,0 +1,95 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: TextNum.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer_Common */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
class Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum extends Zend_Search_Lucene_Analysis_Analyzer_Common
|
||||
{
|
||||
/**
|
||||
* Current position in a stream
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_position;
|
||||
|
||||
/**
|
||||
* Reset token stream
|
||||
*/
|
||||
public function reset()
|
||||
{
|
||||
$this->_position = 0;
|
||||
|
||||
if ($this->_input === null) {
|
||||
return;
|
||||
}
|
||||
|
||||
// convert input into ascii
|
||||
if (PHP_OS != 'AIX') {
|
||||
$this->_input = iconv($this->_encoding, 'ASCII//TRANSLIT', $this->_input);
|
||||
}
|
||||
$this->_encoding = 'ASCII';
|
||||
}
|
||||
|
||||
/**
|
||||
* Tokenization stream API
|
||||
* Get next token
|
||||
* Returns null at the end of stream
|
||||
*
|
||||
* @return Zend_Search_Lucene_Analysis_Token|null
|
||||
*/
|
||||
public function nextToken()
|
||||
{
|
||||
if ($this->_input === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
do {
|
||||
if (! preg_match('/[a-zA-Z0-9]+/', $this->_input, $match, PREG_OFFSET_CAPTURE, $this->_position)) {
|
||||
// It covers both cases a) there are no matches (preg_match(...) === 0)
|
||||
// b) error occured (preg_match(...) === FALSE)
|
||||
return null;
|
||||
}
|
||||
|
||||
$str = $match[0][0];
|
||||
$pos = $match[0][1];
|
||||
$endpos = $pos + strlen($str);
|
||||
|
||||
$this->_position = $endpos;
|
||||
|
||||
$token = $this->normalize(new Zend_Search_Lucene_Analysis_Token($str, $pos, $endpos));
|
||||
} while ($token === null); // try again if token is skipped
|
||||
|
||||
return $token;
|
||||
}
|
||||
}
|
||||
|
47
thirdparty/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum/CaseInsensitive.php
vendored
Normal file
47
thirdparty/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum/CaseInsensitive.php
vendored
Normal file
@ -0,0 +1,47 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: CaseInsensitive.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum.php';
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_TokenFilter_LowerCase */
|
||||
require_once 'Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
class Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum_CaseInsensitive extends Zend_Search_Lucene_Analysis_Analyzer_Common_TextNum
|
||||
{
|
||||
public function __construct()
|
||||
{
|
||||
$this->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_LowerCase());
|
||||
}
|
||||
}
|
||||
|
126
thirdparty/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8.php
vendored
Normal file
126
thirdparty/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8.php
vendored
Normal file
@ -0,0 +1,126 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Utf8.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer_Common */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
class Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8 extends Zend_Search_Lucene_Analysis_Analyzer_Common
|
||||
{
|
||||
/**
|
||||
* Current char position in an UTF-8 stream
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_position;
|
||||
|
||||
/**
|
||||
* Current binary position in an UTF-8 stream
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_bytePosition;
|
||||
|
||||
/**
|
||||
* Object constructor
|
||||
*
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function __construct()
|
||||
{
|
||||
if (@preg_match('/\pL/u', 'a') != 1) {
|
||||
// PCRE unicode support is turned off
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Utf8 analyzer needs PCRE unicode support to be enabled.');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset token stream
|
||||
*/
|
||||
public function reset()
|
||||
{
|
||||
$this->_position = 0;
|
||||
$this->_bytePosition = 0;
|
||||
|
||||
// convert input into UTF-8
|
||||
if (strcasecmp($this->_encoding, 'utf8' ) != 0 &&
|
||||
strcasecmp($this->_encoding, 'utf-8') != 0 ) {
|
||||
$this->_input = iconv($this->_encoding, 'UTF-8', $this->_input);
|
||||
$this->_encoding = 'UTF-8';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tokenization stream API
|
||||
* Get next token
|
||||
* Returns null at the end of stream
|
||||
*
|
||||
* @return Zend_Search_Lucene_Analysis_Token|null
|
||||
*/
|
||||
public function nextToken()
|
||||
{
|
||||
if ($this->_input === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
do {
|
||||
if (! preg_match('/[\p{L}]+/u', $this->_input, $match, PREG_OFFSET_CAPTURE, $this->_bytePosition)) {
|
||||
// It covers both cases a) there are no matches (preg_match(...) === 0)
|
||||
// b) error occured (preg_match(...) === FALSE)
|
||||
return null;
|
||||
}
|
||||
|
||||
// matched string
|
||||
$matchedWord = $match[0][0];
|
||||
|
||||
// binary position of the matched word in the input stream
|
||||
$binStartPos = $match[0][1];
|
||||
|
||||
// character position of the matched word in the input stream
|
||||
$startPos = $this->_position +
|
||||
iconv_strlen(substr($this->_input,
|
||||
$this->_bytePosition,
|
||||
$binStartPos - $this->_bytePosition),
|
||||
'UTF-8');
|
||||
// character postion of the end of matched word in the input stream
|
||||
$endPos = $startPos + iconv_strlen($matchedWord, 'UTF-8');
|
||||
|
||||
$this->_bytePosition = $binStartPos + strlen($matchedWord);
|
||||
$this->_position = $endPos;
|
||||
|
||||
$token = $this->normalize(new Zend_Search_Lucene_Analysis_Token($matchedWord, $startPos, $endPos));
|
||||
} while ($token === null); // try again if token is skipped
|
||||
|
||||
return $token;
|
||||
}
|
||||
}
|
||||
|
49
thirdparty/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8/CaseInsensitive.php
vendored
Normal file
49
thirdparty/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8/CaseInsensitive.php
vendored
Normal file
@ -0,0 +1,49 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: CaseInsensitive.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8 */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8.php';
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_TokenFilter_LowerCaseUtf8 */
|
||||
require_once 'Zend/Search/Lucene/Analysis/TokenFilter/LowerCaseUtf8.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
class Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive extends Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8
|
||||
{
|
||||
public function __construct()
|
||||
{
|
||||
parent::__construct();
|
||||
|
||||
$this->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_LowerCaseUtf8());
|
||||
}
|
||||
}
|
||||
|
126
thirdparty/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num.php
vendored
Normal file
126
thirdparty/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num.php
vendored
Normal file
@ -0,0 +1,126 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Utf8Num.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer_Common */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
class Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num extends Zend_Search_Lucene_Analysis_Analyzer_Common
|
||||
{
|
||||
/**
|
||||
* Current char position in an UTF-8 stream
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_position;
|
||||
|
||||
/**
|
||||
* Current binary position in an UTF-8 stream
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_bytePosition;
|
||||
|
||||
/**
|
||||
* Object constructor
|
||||
*
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function __construct()
|
||||
{
|
||||
if (@preg_match('/\pL/u', 'a') != 1) {
|
||||
// PCRE unicode support is turned off
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Utf8Num analyzer needs PCRE unicode support to be enabled.');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset token stream
|
||||
*/
|
||||
public function reset()
|
||||
{
|
||||
$this->_position = 0;
|
||||
$this->_bytePosition = 0;
|
||||
|
||||
// convert input into UTF-8
|
||||
if (strcasecmp($this->_encoding, 'utf8' ) != 0 &&
|
||||
strcasecmp($this->_encoding, 'utf-8') != 0 ) {
|
||||
$this->_input = iconv($this->_encoding, 'UTF-8', $this->_input);
|
||||
$this->_encoding = 'UTF-8';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tokenization stream API
|
||||
* Get next token
|
||||
* Returns null at the end of stream
|
||||
*
|
||||
* @return Zend_Search_Lucene_Analysis_Token|null
|
||||
*/
|
||||
public function nextToken()
|
||||
{
|
||||
if ($this->_input === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
do {
|
||||
if (! preg_match('/[\p{L}\p{N}]+/u', $this->_input, $match, PREG_OFFSET_CAPTURE, $this->_bytePosition)) {
|
||||
// It covers both cases a) there are no matches (preg_match(...) === 0)
|
||||
// b) error occured (preg_match(...) === FALSE)
|
||||
return null;
|
||||
}
|
||||
|
||||
// matched string
|
||||
$matchedWord = $match[0][0];
|
||||
|
||||
// binary position of the matched word in the input stream
|
||||
$binStartPos = $match[0][1];
|
||||
|
||||
// character position of the matched word in the input stream
|
||||
$startPos = $this->_position +
|
||||
iconv_strlen(substr($this->_input,
|
||||
$this->_bytePosition,
|
||||
$binStartPos - $this->_bytePosition),
|
||||
'UTF-8');
|
||||
// character postion of the end of matched word in the input stream
|
||||
$endPos = $startPos + iconv_strlen($matchedWord, 'UTF-8');
|
||||
|
||||
$this->_bytePosition = $binStartPos + strlen($matchedWord);
|
||||
$this->_position = $endPos;
|
||||
|
||||
$token = $this->normalize(new Zend_Search_Lucene_Analysis_Token($matchedWord, $startPos, $endPos));
|
||||
} while ($token === null); // try again if token is skipped
|
||||
|
||||
return $token;
|
||||
}
|
||||
}
|
||||
|
49
thirdparty/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num/CaseInsensitive.php
vendored
Normal file
49
thirdparty/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num/CaseInsensitive.php
vendored
Normal file
@ -0,0 +1,49 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: CaseInsensitive.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num.php';
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_TokenFilter_LowerCaseUtf8 */
|
||||
require_once 'Zend/Search/Lucene/Analysis/TokenFilter/LowerCaseUtf8.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
class Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive extends Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num
|
||||
{
|
||||
public function __construct()
|
||||
{
|
||||
parent::__construct();
|
||||
|
||||
$this->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_LowerCaseUtf8());
|
||||
}
|
||||
}
|
||||
|
154
thirdparty/Zend/Search/Lucene/Analysis/Token.php
vendored
Normal file
154
thirdparty/Zend/Search/Lucene/Analysis/Token.php
vendored
Normal file
@ -0,0 +1,154 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Token.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Analysis_Token
|
||||
{
|
||||
/**
|
||||
* The text of the term.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_termText;
|
||||
|
||||
/**
|
||||
* Start in source text.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_startOffset;
|
||||
|
||||
/**
|
||||
* End in source text
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_endOffset;
|
||||
|
||||
/**
|
||||
* The position of this token relative to the previous Token.
|
||||
*
|
||||
* The default value is one.
|
||||
*
|
||||
* Some common uses for this are:
|
||||
* Set it to zero to put multiple terms in the same position. This is
|
||||
* useful if, e.g., a word has multiple stems. Searches for phrases
|
||||
* including either stem will match. In this case, all but the first stem's
|
||||
* increment should be set to zero: the increment of the first instance
|
||||
* should be one. Repeating a token with an increment of zero can also be
|
||||
* used to boost the scores of matches on that token.
|
||||
*
|
||||
* Set it to values greater than one to inhibit exact phrase matches.
|
||||
* If, for example, one does not want phrases to match across removed stop
|
||||
* words, then one could build a stop word filter that removes stop words and
|
||||
* also sets the increment to the number of stop words removed before each
|
||||
* non-stop word. Then exact phrase queries will only match when the terms
|
||||
* occur with no intervening stop words.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_positionIncrement;
|
||||
|
||||
|
||||
/**
|
||||
* Object constructor
|
||||
*
|
||||
* @param string $text
|
||||
* @param integer $start
|
||||
* @param integer $end
|
||||
* @param string $type
|
||||
*/
|
||||
public function __construct($text, $start, $end)
|
||||
{
|
||||
$this->_termText = $text;
|
||||
$this->_startOffset = $start;
|
||||
$this->_endOffset = $end;
|
||||
|
||||
$this->_positionIncrement = 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* positionIncrement setter
|
||||
*
|
||||
* @param integer $positionIncrement
|
||||
*/
|
||||
public function setPositionIncrement($positionIncrement)
|
||||
{
|
||||
$this->_positionIncrement = $positionIncrement;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the position increment of this Token.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function getPositionIncrement()
|
||||
{
|
||||
return $this->_positionIncrement;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the Token's term text.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getTermText()
|
||||
{
|
||||
return $this->_termText;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns this Token's starting offset, the position of the first character
|
||||
* corresponding to this token in the source text.
|
||||
*
|
||||
* Note:
|
||||
* The difference between getEndOffset() and getStartOffset() may not be equal
|
||||
* to strlen(Zend_Search_Lucene_Analysis_Token::getTermText()), as the term text may have been altered
|
||||
* by a stemmer or some other filter.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function getStartOffset()
|
||||
{
|
||||
return $this->_startOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns this Token's ending offset, one greater than the position of the
|
||||
* last character corresponding to this token in the source text.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function getEndOffset()
|
||||
{
|
||||
return $this->_endOffset;
|
||||
}
|
||||
}
|
||||
|
47
thirdparty/Zend/Search/Lucene/Analysis/TokenFilter.php
vendored
Normal file
47
thirdparty/Zend/Search/Lucene/Analysis/TokenFilter.php
vendored
Normal file
@ -0,0 +1,47 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: TokenFilter.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Token */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Token.php';
|
||||
|
||||
|
||||
/**
|
||||
* Token filter converts (normalizes) Token ore removes it from a token stream.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
abstract class Zend_Search_Lucene_Analysis_TokenFilter
|
||||
{
|
||||
/**
|
||||
* Normalize Token or remove it (if null is returned)
|
||||
*
|
||||
* @param Zend_Search_Lucene_Analysis_Token $srcToken
|
||||
* @return Zend_Search_Lucene_Analysis_Token
|
||||
*/
|
||||
abstract public function normalize(Zend_Search_Lucene_Analysis_Token $srcToken);
|
||||
}
|
||||
|
58
thirdparty/Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php
vendored
Normal file
58
thirdparty/Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php
vendored
Normal file
@ -0,0 +1,58 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: LowerCase.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_TokenFilter */
|
||||
require_once 'Zend/Search/Lucene/Analysis/TokenFilter.php';
|
||||
|
||||
|
||||
/**
|
||||
* Lower case Token filter.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
class Zend_Search_Lucene_Analysis_TokenFilter_LowerCase extends Zend_Search_Lucene_Analysis_TokenFilter
|
||||
{
|
||||
/**
|
||||
* Normalize Token or remove it (if null is returned)
|
||||
*
|
||||
* @param Zend_Search_Lucene_Analysis_Token $srcToken
|
||||
* @return Zend_Search_Lucene_Analysis_Token
|
||||
*/
|
||||
public function normalize(Zend_Search_Lucene_Analysis_Token $srcToken)
|
||||
{
|
||||
$newToken = new Zend_Search_Lucene_Analysis_Token(
|
||||
strtolower( $srcToken->getTermText() ),
|
||||
$srcToken->getStartOffset(),
|
||||
$srcToken->getEndOffset());
|
||||
|
||||
$newToken->setPositionIncrement($srcToken->getPositionIncrement());
|
||||
|
||||
return $newToken;
|
||||
}
|
||||
}
|
||||
|
70
thirdparty/Zend/Search/Lucene/Analysis/TokenFilter/LowerCaseUtf8.php
vendored
Normal file
70
thirdparty/Zend/Search/Lucene/Analysis/TokenFilter/LowerCaseUtf8.php
vendored
Normal file
@ -0,0 +1,70 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: LowerCaseUtf8.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_TokenFilter */
|
||||
require_once 'Zend/Search/Lucene/Analysis/TokenFilter.php';
|
||||
|
||||
|
||||
/**
|
||||
* Lower case Token filter.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
class Zend_Search_Lucene_Analysis_TokenFilter_LowerCaseUtf8 extends Zend_Search_Lucene_Analysis_TokenFilter
|
||||
{
|
||||
/**
|
||||
* Object constructor
|
||||
*/
|
||||
public function __construct()
|
||||
{
|
||||
if (!function_exists('mb_strtolower')) {
|
||||
// mbstring extension is disabled
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Utf8 compatible lower case filter needs mbstring extension to be enabled.');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize Token or remove it (if null is returned)
|
||||
*
|
||||
* @param Zend_Search_Lucene_Analysis_Token $srcToken
|
||||
* @return Zend_Search_Lucene_Analysis_Token
|
||||
*/
|
||||
public function normalize(Zend_Search_Lucene_Analysis_Token $srcToken)
|
||||
{
|
||||
$newToken = new Zend_Search_Lucene_Analysis_Token(
|
||||
mb_strtolower($srcToken->getTermText(), 'UTF-8'),
|
||||
$srcToken->getStartOffset(),
|
||||
$srcToken->getEndOffset());
|
||||
|
||||
$newToken->setPositionIncrement($srcToken->getPositionIncrement());
|
||||
|
||||
return $newToken;
|
||||
}
|
||||
}
|
||||
|
69
thirdparty/Zend/Search/Lucene/Analysis/TokenFilter/ShortWords.php
vendored
Normal file
69
thirdparty/Zend/Search/Lucene/Analysis/TokenFilter/ShortWords.php
vendored
Normal file
@ -0,0 +1,69 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: ShortWords.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_TokenFilter */
|
||||
require_once 'Zend/Search/Lucene/Analysis/TokenFilter.php';
|
||||
|
||||
|
||||
/**
|
||||
* Token filter that removes short words. What is short word can be configured with constructor.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
class Zend_Search_Lucene_Analysis_TokenFilter_ShortWords extends Zend_Search_Lucene_Analysis_TokenFilter
|
||||
{
|
||||
/**
|
||||
* Minimum allowed term length
|
||||
* @var integer
|
||||
*/
|
||||
private $length;
|
||||
|
||||
/**
|
||||
* Constructs new instance of this filter.
|
||||
*
|
||||
* @param integer $short minimum allowed length of term which passes this filter (default 2)
|
||||
*/
|
||||
public function __construct($length = 2) {
|
||||
$this->length = $length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize Token or remove it (if null is returned)
|
||||
*
|
||||
* @param Zend_Search_Lucene_Analysis_Token $srcToken
|
||||
* @return Zend_Search_Lucene_Analysis_Token
|
||||
*/
|
||||
public function normalize(Zend_Search_Lucene_Analysis_Token $srcToken) {
|
||||
if (strlen($srcToken->getTermText()) < $this->length) {
|
||||
return null;
|
||||
} else {
|
||||
return $srcToken;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
101
thirdparty/Zend/Search/Lucene/Analysis/TokenFilter/StopWords.php
vendored
Normal file
101
thirdparty/Zend/Search/Lucene/Analysis/TokenFilter/StopWords.php
vendored
Normal file
@ -0,0 +1,101 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: StopWords.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_TokenFilter */
|
||||
require_once 'Zend/Search/Lucene/Analysis/TokenFilter.php';
|
||||
|
||||
/**
|
||||
* Token filter that removes stop words. These words must be provided as array (set), example:
|
||||
* $stopwords = array('the' => 1, 'an' => '1');
|
||||
*
|
||||
* We do recommend to provide all words in lowercase and concatenate this class after the lowercase filter.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
class Zend_Search_Lucene_Analysis_TokenFilter_StopWords extends Zend_Search_Lucene_Analysis_TokenFilter
|
||||
{
|
||||
/**
|
||||
* Stop Words
|
||||
* @var array
|
||||
*/
|
||||
private $_stopSet;
|
||||
|
||||
/**
|
||||
* Constructs new instance of this filter.
|
||||
*
|
||||
* @param array $stopwords array (set) of words that will be filtered out
|
||||
*/
|
||||
public function __construct($stopwords = array()) {
|
||||
$this->_stopSet = array_flip($stopwords);
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize Token or remove it (if null is returned)
|
||||
*
|
||||
* @param Zend_Search_Lucene_Analysis_Token $srcToken
|
||||
* @return Zend_Search_Lucene_Analysis_Token
|
||||
*/
|
||||
public function normalize(Zend_Search_Lucene_Analysis_Token $srcToken) {
|
||||
if (array_key_exists($srcToken->getTermText(), $this->_stopSet)) {
|
||||
return null;
|
||||
} else {
|
||||
return $srcToken;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fills stopwords set from a text file. Each line contains one stopword, lines with '#' in the first
|
||||
* column are ignored (as comments).
|
||||
*
|
||||
* You can call this method one or more times. New stopwords are always added to current set.
|
||||
*
|
||||
* @param string $filepath full path for text file with stopwords
|
||||
* @throws Zend_Search_Exception When the file doesn`t exists or is not readable.
|
||||
*/
|
||||
public function loadFromFile($filepath = null) {
|
||||
if (! $filepath || ! file_exists($filepath)) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('You have to provide valid file path');
|
||||
}
|
||||
$fd = fopen($filepath, "r");
|
||||
if (! $fd) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Cannot open file ' . $filepath);
|
||||
}
|
||||
while (!feof ($fd)) {
|
||||
$buffer = trim(fgets($fd));
|
||||
if (strlen($buffer) > 0 && $buffer[0] != '#') {
|
||||
$this->_stopSet[$buffer] = 1;
|
||||
}
|
||||
}
|
||||
if (!fclose($fd)) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Cannot close file ' . $filepath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
131
thirdparty/Zend/Search/Lucene/Document.php
vendored
Normal file
131
thirdparty/Zend/Search/Lucene/Document.php
vendored
Normal file
@ -0,0 +1,131 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Document
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Document.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Field */
|
||||
require_once 'Zend/Search/Lucene/Field.php';
|
||||
|
||||
|
||||
/**
|
||||
* A Document is a set of fields. Each field has a name and a textual value.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Document
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Document
|
||||
{
|
||||
|
||||
/**
|
||||
* Associative array Zend_Search_Lucene_Field objects where the keys to the
|
||||
* array are the names of the fields.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $_fields = array();
|
||||
|
||||
/**
|
||||
* Field boost factor
|
||||
* It's not stored directly in the index, but affects on normalization factor
|
||||
*
|
||||
* @var float
|
||||
*/
|
||||
public $boost = 1.0;
|
||||
|
||||
/**
|
||||
* Proxy method for getFieldValue(), provides more convenient access to
|
||||
* the string value of a field.
|
||||
*
|
||||
* @param $offset
|
||||
* @return string
|
||||
*/
|
||||
public function __get($offset)
|
||||
{
|
||||
return $this->getFieldValue($offset);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add a field object to this document.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Field $field
|
||||
* @return Zend_Search_Lucene_Document
|
||||
*/
|
||||
public function addField(Zend_Search_Lucene_Field $field)
|
||||
{
|
||||
$this->_fields[$field->name] = $field;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return an array with the names of the fields in this document.
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getFieldNames()
|
||||
{
|
||||
return array_keys($this->_fields);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns Zend_Search_Lucene_Field object for a named field in this document.
|
||||
*
|
||||
* @param string $fieldName
|
||||
* @return Zend_Search_Lucene_Field
|
||||
*/
|
||||
public function getField($fieldName)
|
||||
{
|
||||
if (!array_key_exists($fieldName, $this->_fields)) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception("Field name \"$fieldName\" not found in document.");
|
||||
}
|
||||
return $this->_fields[$fieldName];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the string value of a named field in this document.
|
||||
*
|
||||
* @see __get()
|
||||
* @return string
|
||||
*/
|
||||
public function getFieldValue($fieldName)
|
||||
{
|
||||
return $this->getField($fieldName)->value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the string value of a named field in UTF-8 encoding.
|
||||
*
|
||||
* @see __get()
|
||||
* @return string
|
||||
*/
|
||||
public function getFieldUtf8Value($fieldName)
|
||||
{
|
||||
return $this->getField($fieldName)->getUtf8Value();
|
||||
}
|
||||
}
|
151
thirdparty/Zend/Search/Lucene/Document/Docx.php
vendored
Normal file
151
thirdparty/Zend/Search/Lucene/Document/Docx.php
vendored
Normal file
@ -0,0 +1,151 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Document
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Docx.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
/** Zend_Search_Lucene_Document_OpenXml */
|
||||
require_once 'Zend/Search/Lucene/Document/OpenXml.php';
|
||||
|
||||
/**
|
||||
* Docx document.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Document
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Document_Docx extends Zend_Search_Lucene_Document_OpenXml {
|
||||
/**
|
||||
* Xml Schema - WordprocessingML
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
const SCHEMA_WORDPROCESSINGML = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main';
|
||||
|
||||
/**
|
||||
* Object constructor
|
||||
*
|
||||
* @param string $fileName
|
||||
* @param boolean $storeContent
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
private function __construct($fileName, $storeContent) {
|
||||
if (!class_exists('ZipArchive', false)) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('MS Office documents processing functionality requires Zip extension to be loaded');
|
||||
}
|
||||
|
||||
// Document data holders
|
||||
$documentBody = array();
|
||||
$coreProperties = array();
|
||||
|
||||
// Open OpenXML package
|
||||
$package = new ZipArchive();
|
||||
$package->open($fileName);
|
||||
|
||||
// Read relations and search for officeDocument
|
||||
$relationsXml = $package->getFromName('_rels/.rels');
|
||||
if ($relationsXml === false) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Invalid archive or corrupted .docx file.');
|
||||
}
|
||||
$relations = simplexml_load_string($relationsXml);
|
||||
foreach($relations->Relationship as $rel) {
|
||||
if ($rel ["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_OFFICEDOCUMENT) {
|
||||
// Found office document! Read in contents...
|
||||
$contents = simplexml_load_string($package->getFromName(
|
||||
$this->absoluteZipPath(dirname($rel['Target'])
|
||||
. '/'
|
||||
. basename($rel['Target']))
|
||||
));
|
||||
|
||||
$contents->registerXPathNamespace('w', Zend_Search_Lucene_Document_Docx::SCHEMA_WORDPROCESSINGML);
|
||||
$paragraphs = $contents->xpath('//w:body/w:p');
|
||||
|
||||
foreach ($paragraphs as $paragraph) {
|
||||
$runs = $paragraph->xpath('.//w:r/*[name() = "w:t" or name() = "w:br"]');
|
||||
|
||||
if ($runs === false) {
|
||||
// Paragraph doesn't contain any text or breaks
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach ($runs as $run) {
|
||||
if ($run->getName() == 'br') {
|
||||
// Break element
|
||||
$documentBody[] = ' ';
|
||||
} else {
|
||||
$documentBody[] = (string)$run;
|
||||
}
|
||||
}
|
||||
|
||||
// Add space after each paragraph. So they are not bound together.
|
||||
$documentBody[] = ' ';
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Read core properties
|
||||
$coreProperties = $this->extractMetaData($package);
|
||||
|
||||
// Close file
|
||||
$package->close();
|
||||
|
||||
// Store filename
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8'));
|
||||
|
||||
// Store contents
|
||||
if ($storeContent) {
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('body', implode('', $documentBody), 'UTF-8'));
|
||||
} else {
|
||||
$this->addField(Zend_Search_Lucene_Field::UnStored('body', implode('', $documentBody), 'UTF-8'));
|
||||
}
|
||||
|
||||
// Store meta data properties
|
||||
foreach ($coreProperties as $key => $value) {
|
||||
$this->addField(Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8'));
|
||||
}
|
||||
|
||||
// Store title (if not present in meta data)
|
||||
if (! isset($coreProperties['title'])) {
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8'));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Load Docx document from a file
|
||||
*
|
||||
* @param string $fileName
|
||||
* @param boolean $storeContent
|
||||
* @return Zend_Search_Lucene_Document_Docx
|
||||
* @throws Zend_Search_Lucene_Document_Exception
|
||||
*/
|
||||
public static function loadDocxFile($fileName, $storeContent = false) {
|
||||
if (!is_readable($fileName)) {
|
||||
require_once 'Zend/Search/Lucene/Document/Exception.php';
|
||||
throw new Zend_Search_Lucene_Document_Exception('Provided file \'' . $fileName . '\' is not readable.');
|
||||
}
|
||||
|
||||
return new Zend_Search_Lucene_Document_Docx($fileName, $storeContent);
|
||||
}
|
||||
}
|
37
thirdparty/Zend/Search/Lucene/Document/Exception.php
vendored
Normal file
37
thirdparty/Zend/Search/Lucene/Document/Exception.php
vendored
Normal file
@ -0,0 +1,37 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Exception.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Framework base exception
|
||||
*/
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Document_Exception extends Zend_Search_Lucene_Exception
|
||||
{}
|
||||
|
481
thirdparty/Zend/Search/Lucene/Document/Html.php
vendored
Normal file
481
thirdparty/Zend/Search/Lucene/Document/Html.php
vendored
Normal file
@ -0,0 +1,481 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Document
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Html.php 21946 2010-04-19 08:21:02Z alexander $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Document */
|
||||
require_once 'Zend/Search/Lucene/Document.php';
|
||||
|
||||
|
||||
/**
|
||||
* HTML document.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Document
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
|
||||
{
|
||||
/**
|
||||
* List of document links
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_links = array();
|
||||
|
||||
/**
|
||||
* List of document header links
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_headerLinks = array();
|
||||
|
||||
/**
|
||||
* Stored DOM representation
|
||||
*
|
||||
* @var DOMDocument
|
||||
*/
|
||||
private $_doc;
|
||||
|
||||
/**
|
||||
* Exclud nofollow links flag
|
||||
*
|
||||
* If true then links with rel='nofollow' attribute are not included into
|
||||
* document links.
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
private static $_excludeNoFollowLinks = false;
|
||||
|
||||
/**
|
||||
*
|
||||
* List of inline tags
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_inlineTags = array('a', 'abbr', 'acronym', 'dfn', 'em', 'strong', 'code',
|
||||
'samp', 'kbd', 'var', 'b', 'i', 'big', 'small', 'strike',
|
||||
'tt', 'u', 'font', 'span', 'bdo', 'cite', 'del', 'ins',
|
||||
'q', 'sub', 'sup');
|
||||
|
||||
/**
|
||||
* Object constructor
|
||||
*
|
||||
* @param string $data HTML string (may be HTML fragment, )
|
||||
* @param boolean $isFile
|
||||
* @param boolean $storeContent
|
||||
* @param string $defaultEncoding HTML encoding, is used if it's not specified using Content-type HTTP-EQUIV meta tag.
|
||||
*/
|
||||
private function __construct($data, $isFile, $storeContent, $defaultEncoding = '')
|
||||
{
|
||||
$this->_doc = new DOMDocument();
|
||||
$this->_doc->substituteEntities = true;
|
||||
|
||||
if ($isFile) {
|
||||
$htmlData = file_get_contents($data);
|
||||
} else {
|
||||
$htmlData = $data;
|
||||
}
|
||||
@$this->_doc->loadHTML($htmlData);
|
||||
|
||||
if ($this->_doc->encoding === null) {
|
||||
// Document encoding is not recognized
|
||||
|
||||
/** @todo improve HTML vs HTML fragment recognition */
|
||||
if (preg_match('/<html>/i', $htmlData, $matches, PREG_OFFSET_CAPTURE)) {
|
||||
// It's an HTML document
|
||||
// Add additional HEAD section and recognize document
|
||||
$htmlTagOffset = $matches[0][1] + strlen($matches[0][0]);
|
||||
|
||||
@$this->_doc->loadHTML(iconv($defaultEncoding, 'UTF-8//IGNORE', substr($htmlData, 0, $htmlTagOffset))
|
||||
. '<head><META HTTP-EQUIV="Content-type" CONTENT="text/html; charset=UTF-8"/></head>'
|
||||
. iconv($defaultEncoding, 'UTF-8//IGNORE', substr($htmlData, $htmlTagOffset)));
|
||||
|
||||
// Remove additional HEAD section
|
||||
$xpath = new DOMXPath($this->_doc);
|
||||
$head = $xpath->query('/html/head')->item(0);
|
||||
$head->parentNode->removeChild($head);
|
||||
} else {
|
||||
// It's an HTML fragment
|
||||
@$this->_doc->loadHTML('<html><head><META HTTP-EQUIV="Content-type" CONTENT="text/html; charset=UTF-8"/></head><body>'
|
||||
. iconv($defaultEncoding, 'UTF-8//IGNORE', $htmlData)
|
||||
. '</body></html>');
|
||||
}
|
||||
|
||||
}
|
||||
/** @todo Add correction of wrong HTML encoding recognition processing
|
||||
* The case is:
|
||||
* Content-type HTTP-EQUIV meta tag is presented, but ISO-8859-5 encoding is actually used,
|
||||
* even $this->_doc->encoding demonstrates another recognized encoding
|
||||
*/
|
||||
|
||||
$xpath = new DOMXPath($this->_doc);
|
||||
|
||||
$docTitle = '';
|
||||
$titleNodes = $xpath->query('/html/head/title');
|
||||
foreach ($titleNodes as $titleNode) {
|
||||
// title should always have only one entry, but we process all nodeset entries
|
||||
$docTitle .= $titleNode->nodeValue . ' ';
|
||||
}
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('title', $docTitle, 'UTF-8'));
|
||||
|
||||
$metaNodes = $xpath->query('/html/head/meta[@name]');
|
||||
foreach ($metaNodes as $metaNode) {
|
||||
$this->addField(Zend_Search_Lucene_Field::Text($metaNode->getAttribute('name'),
|
||||
$metaNode->getAttribute('content'),
|
||||
'UTF-8'));
|
||||
}
|
||||
|
||||
$docBody = '';
|
||||
$bodyNodes = $xpath->query('/html/body');
|
||||
foreach ($bodyNodes as $bodyNode) {
|
||||
// body should always have only one entry, but we process all nodeset entries
|
||||
$this->_retrieveNodeText($bodyNode, $docBody);
|
||||
}
|
||||
if ($storeContent) {
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('body', $docBody, 'UTF-8'));
|
||||
} else {
|
||||
$this->addField(Zend_Search_Lucene_Field::UnStored('body', $docBody, 'UTF-8'));
|
||||
}
|
||||
|
||||
$linkNodes = $this->_doc->getElementsByTagName('a');
|
||||
foreach ($linkNodes as $linkNode) {
|
||||
if (($href = $linkNode->getAttribute('href')) != '' &&
|
||||
(!self::$_excludeNoFollowLinks || strtolower($linkNode->getAttribute('rel')) != 'nofollow' )
|
||||
) {
|
||||
$this->_links[] = $href;
|
||||
}
|
||||
}
|
||||
$linkNodes = $this->_doc->getElementsByTagName('area');
|
||||
foreach ($linkNodes as $linkNode) {
|
||||
if (($href = $linkNode->getAttribute('href')) != '' &&
|
||||
(!self::$_excludeNoFollowLinks || strtolower($linkNode->getAttribute('rel')) != 'nofollow' )
|
||||
) {
|
||||
$this->_links[] = $href;
|
||||
}
|
||||
}
|
||||
$this->_links = array_unique($this->_links);
|
||||
|
||||
$linkNodes = $xpath->query('/html/head/link');
|
||||
foreach ($linkNodes as $linkNode) {
|
||||
if (($href = $linkNode->getAttribute('href')) != '') {
|
||||
$this->_headerLinks[] = $href;
|
||||
}
|
||||
}
|
||||
$this->_headerLinks = array_unique($this->_headerLinks);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set exclude nofollow links flag
|
||||
*
|
||||
* @param boolean $newValue
|
||||
*/
|
||||
public static function setExcludeNoFollowLinks($newValue)
|
||||
{
|
||||
self::$_excludeNoFollowLinks = $newValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get exclude nofollow links flag
|
||||
*
|
||||
* @return boolean
|
||||
*/
|
||||
public static function getExcludeNoFollowLinks()
|
||||
{
|
||||
return self::$_excludeNoFollowLinks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get node text
|
||||
*
|
||||
* We should exclude scripts, which may be not included into comment tags, CDATA sections,
|
||||
*
|
||||
* @param DOMNode $node
|
||||
* @param string &$text
|
||||
*/
|
||||
private function _retrieveNodeText(DOMNode $node, &$text)
|
||||
{
|
||||
if ($node->nodeType == XML_TEXT_NODE) {
|
||||
$text .= $node->nodeValue;
|
||||
if(!in_array($node->parentNode->tagName, $this->_inlineTags)) {
|
||||
$text .= ' ';
|
||||
}
|
||||
} else if ($node->nodeType == XML_ELEMENT_NODE && $node->nodeName != 'script') {
|
||||
foreach ($node->childNodes as $childNode) {
|
||||
$this->_retrieveNodeText($childNode, $text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get document HREF links
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getLinks()
|
||||
{
|
||||
return $this->_links;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get document header links
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getHeaderLinks()
|
||||
{
|
||||
return $this->_headerLinks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load HTML document from a string
|
||||
*
|
||||
* @param string $data
|
||||
* @param boolean $storeContent
|
||||
* @param string $defaultEncoding HTML encoding, is used if it's not specified using Content-type HTTP-EQUIV meta tag.
|
||||
* @return Zend_Search_Lucene_Document_Html
|
||||
*/
|
||||
public static function loadHTML($data, $storeContent = false, $defaultEncoding = '')
|
||||
{
|
||||
return new Zend_Search_Lucene_Document_Html($data, false, $storeContent, $defaultEncoding);
|
||||
}
|
||||
|
||||
/**
|
||||
* Load HTML document from a file
|
||||
*
|
||||
* @param string $file
|
||||
* @param boolean $storeContent
|
||||
* @param string $defaultEncoding HTML encoding, is used if it's not specified using Content-type HTTP-EQUIV meta tag.
|
||||
* @return Zend_Search_Lucene_Document_Html
|
||||
*/
|
||||
public static function loadHTMLFile($file, $storeContent = false, $defaultEncoding = '')
|
||||
{
|
||||
return new Zend_Search_Lucene_Document_Html($file, true, $storeContent, $defaultEncoding);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Highlight text in text node
|
||||
*
|
||||
* @param DOMText $node
|
||||
* @param array $wordsToHighlight
|
||||
* @param callback $callback Callback method, used to transform (highlighting) text.
|
||||
* @param array $params Array of additionall callback parameters (first non-optional parameter is a text to transform)
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
protected function _highlightTextNode(DOMText $node, $wordsToHighlight, $callback, $params)
|
||||
{
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
|
||||
|
||||
$analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault();
|
||||
$analyzer->setInput($node->nodeValue, 'UTF-8');
|
||||
|
||||
$matchedTokens = array();
|
||||
|
||||
while (($token = $analyzer->nextToken()) !== null) {
|
||||
if (isset($wordsToHighlight[$token->getTermText()])) {
|
||||
$matchedTokens[] = $token;
|
||||
}
|
||||
}
|
||||
|
||||
if (count($matchedTokens) == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
$matchedTokens = array_reverse($matchedTokens);
|
||||
|
||||
foreach ($matchedTokens as $token) {
|
||||
// Cut text after matched token
|
||||
$node->splitText($token->getEndOffset());
|
||||
|
||||
// Cut matched node
|
||||
$matchedWordNode = $node->splitText($token->getStartOffset());
|
||||
|
||||
// Retrieve HTML string representation for highlihted word
|
||||
$fullCallbackparamsList = $params;
|
||||
array_unshift($fullCallbackparamsList, $matchedWordNode->nodeValue);
|
||||
$highlightedWordNodeSetHtml = call_user_func_array($callback, $fullCallbackparamsList);
|
||||
|
||||
// Transform HTML string to a DOM representation and automatically transform retrieved string
|
||||
// into valid XHTML (It's automatically done by loadHTML() method)
|
||||
$highlightedWordNodeSetDomDocument = new DOMDocument('1.0', 'UTF-8');
|
||||
$success = @$highlightedWordNodeSetDomDocument->
|
||||
loadHTML('<html><head><meta http-equiv="Content-type" content="text/html; charset=UTF-8"/></head><body>'
|
||||
. $highlightedWordNodeSetHtml
|
||||
. '</body></html>');
|
||||
if (!$success) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception("Error occured while loading highlighted text fragment: '$highlightedWordNodeSetHtml'.");
|
||||
}
|
||||
$highlightedWordNodeSetXpath = new DOMXPath($highlightedWordNodeSetDomDocument);
|
||||
$highlightedWordNodeSet = $highlightedWordNodeSetXpath->query('/html/body')->item(0)->childNodes;
|
||||
|
||||
for ($count = 0; $count < $highlightedWordNodeSet->length; $count++) {
|
||||
$nodeToImport = $highlightedWordNodeSet->item($count);
|
||||
$node->parentNode->insertBefore($this->_doc->importNode($nodeToImport, true /* deep copy */),
|
||||
$matchedWordNode);
|
||||
}
|
||||
|
||||
$node->parentNode->removeChild($matchedWordNode);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* highlight words in content of the specified node
|
||||
*
|
||||
* @param DOMNode $contextNode
|
||||
* @param array $wordsToHighlight
|
||||
* @param callback $callback Callback method, used to transform (highlighting) text.
|
||||
* @param array $params Array of additionall callback parameters (first non-optional parameter is a text to transform)
|
||||
*/
|
||||
protected function _highlightNodeRecursive(DOMNode $contextNode, $wordsToHighlight, $callback, $params)
|
||||
{
|
||||
$textNodes = array();
|
||||
|
||||
if (!$contextNode->hasChildNodes()) {
|
||||
return;
|
||||
}
|
||||
|
||||
foreach ($contextNode->childNodes as $childNode) {
|
||||
if ($childNode->nodeType == XML_TEXT_NODE) {
|
||||
// process node later to leave childNodes structure untouched
|
||||
$textNodes[] = $childNode;
|
||||
} else {
|
||||
// Process node if it's not a script node
|
||||
if ($childNode->nodeName != 'script') {
|
||||
$this->_highlightNodeRecursive($childNode, $wordsToHighlight, $callback, $params);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($textNodes as $textNode) {
|
||||
$this->_highlightTextNode($textNode, $wordsToHighlight, $callback, $params);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Standard callback method used to highlight words.
|
||||
*
|
||||
* @param string $stringToHighlight
|
||||
* @return string
|
||||
* @internal
|
||||
*/
|
||||
public function applyColour($stringToHighlight, $colour)
|
||||
{
|
||||
return '<b style="color:black;background-color:' . $colour . '">' . $stringToHighlight . '</b>';
|
||||
}
|
||||
|
||||
/**
|
||||
* Highlight text with specified color
|
||||
*
|
||||
* @param string|array $words
|
||||
* @param string $colour
|
||||
* @return string
|
||||
*/
|
||||
public function highlight($words, $colour = '#66ffff')
|
||||
{
|
||||
return $this->highlightExtended($words, array($this, 'applyColour'), array($colour));
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Highlight text using specified View helper or callback function.
|
||||
*
|
||||
* @param string|array $words Words to highlight. Words could be organized using the array or string.
|
||||
* @param callback $callback Callback method, used to transform (highlighting) text.
|
||||
* @param array $params Array of additionall callback parameters passed through into it
|
||||
* (first non-optional parameter is an HTML fragment for highlighting)
|
||||
* @return string
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function highlightExtended($words, $callback, $params = array())
|
||||
{
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
|
||||
|
||||
if (!is_array($words)) {
|
||||
$words = array($words);
|
||||
}
|
||||
|
||||
$wordsToHighlightList = array();
|
||||
$analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault();
|
||||
foreach ($words as $wordString) {
|
||||
$wordsToHighlightList[] = $analyzer->tokenize($wordString);
|
||||
}
|
||||
$wordsToHighlight = call_user_func_array('array_merge', $wordsToHighlightList);
|
||||
|
||||
if (count($wordsToHighlight) == 0) {
|
||||
return $this->_doc->saveHTML();
|
||||
}
|
||||
|
||||
$wordsToHighlightFlipped = array();
|
||||
foreach ($wordsToHighlight as $id => $token) {
|
||||
$wordsToHighlightFlipped[$token->getTermText()] = $id;
|
||||
}
|
||||
|
||||
if (!is_callable($callback)) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('$viewHelper parameter mast be a View Helper name, View Helper object or callback.');
|
||||
}
|
||||
|
||||
$xpath = new DOMXPath($this->_doc);
|
||||
|
||||
$matchedNodes = $xpath->query("/html/body");
|
||||
foreach ($matchedNodes as $matchedNode) {
|
||||
$this->_highlightNodeRecursive($matchedNode, $wordsToHighlightFlipped, $callback, $params);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get HTML
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getHTML()
|
||||
{
|
||||
return $this->_doc->saveHTML();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get HTML body
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getHtmlBody()
|
||||
{
|
||||
$xpath = new DOMXPath($this->_doc);
|
||||
$bodyNodes = $xpath->query('/html/body')->item(0)->childNodes;
|
||||
|
||||
$outputFragments = array();
|
||||
for ($count = 0; $count < $bodyNodes->length; $count++) {
|
||||
$outputFragments[] = $this->_doc->saveXML($bodyNodes->item($count));
|
||||
}
|
||||
|
||||
return implode($outputFragments);
|
||||
}
|
||||
}
|
||||
|
129
thirdparty/Zend/Search/Lucene/Document/OpenXml.php
vendored
Normal file
129
thirdparty/Zend/Search/Lucene/Document/OpenXml.php
vendored
Normal file
@ -0,0 +1,129 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Document
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: OpenXml.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Document */
|
||||
require_once 'Zend/Search/Lucene/Document.php';
|
||||
|
||||
|
||||
/**
|
||||
* OpenXML document.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Document
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
abstract class Zend_Search_Lucene_Document_OpenXml extends Zend_Search_Lucene_Document
|
||||
{
|
||||
/**
|
||||
* Xml Schema - Relationships
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
const SCHEMA_RELATIONSHIP = 'http://schemas.openxmlformats.org/package/2006/relationships';
|
||||
|
||||
/**
|
||||
* Xml Schema - Office document
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
const SCHEMA_OFFICEDOCUMENT = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument';
|
||||
|
||||
/**
|
||||
* Xml Schema - Core properties
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
const SCHEMA_COREPROPERTIES = 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties';
|
||||
|
||||
/**
|
||||
* Xml Schema - Dublin Core
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
const SCHEMA_DUBLINCORE = 'http://purl.org/dc/elements/1.1/';
|
||||
|
||||
/**
|
||||
* Xml Schema - Dublin Core Terms
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
const SCHEMA_DUBLINCORETERMS = 'http://purl.org/dc/terms/';
|
||||
|
||||
/**
|
||||
* Extract metadata from document
|
||||
*
|
||||
* @param ZipArchive $package ZipArchive OpenXML package
|
||||
* @return array Key-value pairs containing document meta data
|
||||
*/
|
||||
protected function extractMetaData(ZipArchive $package)
|
||||
{
|
||||
// Data holders
|
||||
$coreProperties = array();
|
||||
|
||||
// Read relations and search for core properties
|
||||
$relations = simplexml_load_string($package->getFromName("_rels/.rels"));
|
||||
foreach ($relations->Relationship as $rel) {
|
||||
if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_COREPROPERTIES) {
|
||||
// Found core properties! Read in contents...
|
||||
$contents = simplexml_load_string(
|
||||
$package->getFromName(dirname($rel["Target"]) . "/" . basename($rel["Target"]))
|
||||
);
|
||||
|
||||
foreach ($contents->children(Zend_Search_Lucene_Document_OpenXml::SCHEMA_DUBLINCORE) as $child) {
|
||||
$coreProperties[$child->getName()] = (string)$child;
|
||||
}
|
||||
foreach ($contents->children(Zend_Search_Lucene_Document_OpenXml::SCHEMA_COREPROPERTIES) as $child) {
|
||||
$coreProperties[$child->getName()] = (string)$child;
|
||||
}
|
||||
foreach ($contents->children(Zend_Search_Lucene_Document_OpenXml::SCHEMA_DUBLINCORETERMS) as $child) {
|
||||
$coreProperties[$child->getName()] = (string)$child;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $coreProperties;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine absolute zip path
|
||||
*
|
||||
* @param string $path
|
||||
* @return string
|
||||
*/
|
||||
protected function absoluteZipPath($path) {
|
||||
$path = str_replace(array('/', '\\'), DIRECTORY_SEPARATOR, $path);
|
||||
$parts = array_filter(explode(DIRECTORY_SEPARATOR, $path), 'strlen');
|
||||
$absolutes = array();
|
||||
foreach ($parts as $part) {
|
||||
if ('.' == $part) continue;
|
||||
if ('..' == $part) {
|
||||
array_pop($absolutes);
|
||||
} else {
|
||||
$absolutes[] = $part;
|
||||
}
|
||||
}
|
||||
return implode('/', $absolutes);
|
||||
}
|
||||
}
|
200
thirdparty/Zend/Search/Lucene/Document/Pptx.php
vendored
Normal file
200
thirdparty/Zend/Search/Lucene/Document/Pptx.php
vendored
Normal file
@ -0,0 +1,200 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Document
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Pptx.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Document_OpenXml */
|
||||
require_once 'Zend/Search/Lucene/Document/OpenXml.php';
|
||||
|
||||
/**
|
||||
* Pptx document.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Document
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Document_Pptx extends Zend_Search_Lucene_Document_OpenXml
|
||||
{
|
||||
/**
|
||||
* Xml Schema - PresentationML
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
const SCHEMA_PRESENTATIONML = 'http://schemas.openxmlformats.org/presentationml/2006/main';
|
||||
|
||||
/**
|
||||
* Xml Schema - DrawingML
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
const SCHEMA_DRAWINGML = 'http://schemas.openxmlformats.org/drawingml/2006/main';
|
||||
|
||||
/**
|
||||
* Xml Schema - Slide relation
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
const SCHEMA_SLIDERELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide';
|
||||
|
||||
/**
|
||||
* Xml Schema - Slide notes relation
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
const SCHEMA_SLIDENOTESRELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide';
|
||||
|
||||
/**
|
||||
* Object constructor
|
||||
*
|
||||
* @param string $fileName
|
||||
* @param boolean $storeContent
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
private function __construct($fileName, $storeContent)
|
||||
{
|
||||
if (!class_exists('ZipArchive', false)) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('MS Office documents processing functionality requires Zip extension to be loaded');
|
||||
}
|
||||
|
||||
// Document data holders
|
||||
$slides = array();
|
||||
$slideNotes = array();
|
||||
$documentBody = array();
|
||||
$coreProperties = array();
|
||||
|
||||
// Open OpenXML package
|
||||
$package = new ZipArchive();
|
||||
$package->open($fileName);
|
||||
|
||||
// Read relations and search for officeDocument
|
||||
$relationsXml = $package->getFromName('_rels/.rels');
|
||||
if ($relationsXml === false) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Invalid archive or corrupted .pptx file.');
|
||||
}
|
||||
$relations = simplexml_load_string($relationsXml);
|
||||
foreach ($relations->Relationship as $rel) {
|
||||
if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_OFFICEDOCUMENT) {
|
||||
// Found office document! Search for slides...
|
||||
$slideRelations = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/_rels/" . basename($rel["Target"]) . ".rels")) );
|
||||
foreach ($slideRelations->Relationship as $slideRel) {
|
||||
if ($slideRel["Type"] == Zend_Search_Lucene_Document_Pptx::SCHEMA_SLIDERELATION) {
|
||||
// Found slide!
|
||||
$slides[ str_replace( 'rId', '', (string)$slideRel["Id"] ) ] = simplexml_load_string(
|
||||
$package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/" . basename($slideRel["Target"])) )
|
||||
);
|
||||
|
||||
// Search for slide notes
|
||||
$slideNotesRelations = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/_rels/" . basename($slideRel["Target"]) . ".rels")) );
|
||||
foreach ($slideNotesRelations->Relationship as $slideNoteRel) {
|
||||
if ($slideNoteRel["Type"] == Zend_Search_Lucene_Document_Pptx::SCHEMA_SLIDENOTESRELATION) {
|
||||
// Found slide notes!
|
||||
$slideNotes[ str_replace( 'rId', '', (string)$slideRel["Id"] ) ] = simplexml_load_string(
|
||||
$package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/" . dirname($slideNoteRel["Target"]) . "/" . basename($slideNoteRel["Target"])) )
|
||||
);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Sort slides
|
||||
ksort($slides);
|
||||
ksort($slideNotes);
|
||||
|
||||
// Extract contents from slides
|
||||
foreach ($slides as $slideKey => $slide) {
|
||||
// Register namespaces
|
||||
$slide->registerXPathNamespace("p", Zend_Search_Lucene_Document_Pptx::SCHEMA_PRESENTATIONML);
|
||||
$slide->registerXPathNamespace("a", Zend_Search_Lucene_Document_Pptx::SCHEMA_DRAWINGML);
|
||||
|
||||
// Fetch all text
|
||||
$textElements = $slide->xpath('//a:t');
|
||||
foreach ($textElements as $textElement) {
|
||||
$documentBody[] = (string)$textElement;
|
||||
}
|
||||
|
||||
// Extract contents from slide notes
|
||||
if (isset($slideNotes[$slideKey])) {
|
||||
// Fetch slide note
|
||||
$slideNote = $slideNotes[$slideKey];
|
||||
|
||||
// Register namespaces
|
||||
$slideNote->registerXPathNamespace("p", Zend_Search_Lucene_Document_Pptx::SCHEMA_PRESENTATIONML);
|
||||
$slideNote->registerXPathNamespace("a", Zend_Search_Lucene_Document_Pptx::SCHEMA_DRAWINGML);
|
||||
|
||||
// Fetch all text
|
||||
$textElements = $slideNote->xpath('//a:t');
|
||||
foreach ($textElements as $textElement) {
|
||||
$documentBody[] = (string)$textElement;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Read core properties
|
||||
$coreProperties = $this->extractMetaData($package);
|
||||
|
||||
// Close file
|
||||
$package->close();
|
||||
|
||||
// Store filename
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8'));
|
||||
|
||||
// Store contents
|
||||
if ($storeContent) {
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $documentBody), 'UTF-8'));
|
||||
} else {
|
||||
$this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentBody), 'UTF-8'));
|
||||
}
|
||||
|
||||
// Store meta data properties
|
||||
foreach ($coreProperties as $key => $value)
|
||||
{
|
||||
$this->addField(Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8'));
|
||||
}
|
||||
|
||||
// Store title (if not present in meta data)
|
||||
if (!isset($coreProperties['title']))
|
||||
{
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8'));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Load Pptx document from a file
|
||||
*
|
||||
* @param string $fileName
|
||||
* @param boolean $storeContent
|
||||
* @return Zend_Search_Lucene_Document_Pptx
|
||||
*/
|
||||
public static function loadPptxFile($fileName, $storeContent = false)
|
||||
{
|
||||
return new Zend_Search_Lucene_Document_Pptx($fileName, $storeContent);
|
||||
}
|
||||
}
|
263
thirdparty/Zend/Search/Lucene/Document/Xlsx.php
vendored
Normal file
263
thirdparty/Zend/Search/Lucene/Document/Xlsx.php
vendored
Normal file
@ -0,0 +1,263 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Document
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Xlsx.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Document_OpenXml */
|
||||
require_once 'Zend/Search/Lucene/Document/OpenXml.php';
|
||||
|
||||
/**
|
||||
* Xlsx document.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Document
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Document_Xlsx extends Zend_Search_Lucene_Document_OpenXml
|
||||
{
|
||||
/**
|
||||
* Xml Schema - SpreadsheetML
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
const SCHEMA_SPREADSHEETML = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
|
||||
|
||||
/**
|
||||
* Xml Schema - DrawingML
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
const SCHEMA_DRAWINGML = 'http://schemas.openxmlformats.org/drawingml/2006/main';
|
||||
|
||||
/**
|
||||
* Xml Schema - Shared Strings
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
const SCHEMA_SHAREDSTRINGS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings';
|
||||
|
||||
/**
|
||||
* Xml Schema - Worksheet relation
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
const SCHEMA_WORKSHEETRELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet';
|
||||
|
||||
/**
|
||||
* Xml Schema - Slide notes relation
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
const SCHEMA_SLIDENOTESRELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide';
|
||||
|
||||
/**
|
||||
* Object constructor
|
||||
*
|
||||
* @param string $fileName
|
||||
* @param boolean $storeContent
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
private function __construct($fileName, $storeContent)
|
||||
{
|
||||
if (!class_exists('ZipArchive', false)) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('MS Office documents processing functionality requires Zip extension to be loaded');
|
||||
}
|
||||
|
||||
// Document data holders
|
||||
$sharedStrings = array();
|
||||
$worksheets = array();
|
||||
$documentBody = array();
|
||||
$coreProperties = array();
|
||||
|
||||
// Open OpenXML package
|
||||
$package = new ZipArchive();
|
||||
$package->open($fileName);
|
||||
|
||||
// Read relations and search for officeDocument
|
||||
$relationsXml = $package->getFromName('_rels/.rels');
|
||||
if ($relationsXml === false) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Invalid archive or corrupted .xlsx file.');
|
||||
}
|
||||
$relations = simplexml_load_string($relationsXml);
|
||||
foreach ($relations->Relationship as $rel) {
|
||||
if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_OFFICEDOCUMENT) {
|
||||
// Found office document! Read relations for workbook...
|
||||
$workbookRelations = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/_rels/" . basename($rel["Target"]) . ".rels")) );
|
||||
$workbookRelations->registerXPathNamespace("rel", Zend_Search_Lucene_Document_OpenXml::SCHEMA_RELATIONSHIP);
|
||||
|
||||
// Read shared strings
|
||||
$sharedStringsPath = $workbookRelations->xpath("rel:Relationship[@Type='" . Zend_Search_Lucene_Document_Xlsx::SCHEMA_SHAREDSTRINGS . "']");
|
||||
$sharedStringsPath = (string)$sharedStringsPath[0]['Target'];
|
||||
$xmlStrings = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . $sharedStringsPath)) );
|
||||
if (isset($xmlStrings) && isset($xmlStrings->si)) {
|
||||
foreach ($xmlStrings->si as $val) {
|
||||
if (isset($val->t)) {
|
||||
$sharedStrings[] = (string)$val->t;
|
||||
} elseif (isset($val->r)) {
|
||||
$sharedStrings[] = $this->_parseRichText($val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Loop relations for workbook and extract worksheets...
|
||||
foreach ($workbookRelations->Relationship as $workbookRelation) {
|
||||
if ($workbookRelation["Type"] == Zend_Search_Lucene_Document_Xlsx::SCHEMA_WORKSHEETRELATION) {
|
||||
$worksheets[ str_replace( 'rId', '', (string)$workbookRelation["Id"]) ] = simplexml_load_string(
|
||||
$package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($workbookRelation["Target"]) . "/" . basename($workbookRelation["Target"])) )
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Sort worksheets
|
||||
ksort($worksheets);
|
||||
|
||||
// Extract contents from worksheets
|
||||
foreach ($worksheets as $sheetKey => $worksheet) {
|
||||
foreach ($worksheet->sheetData->row as $row) {
|
||||
foreach ($row->c as $c) {
|
||||
// Determine data type
|
||||
$dataType = (string)$c["t"];
|
||||
switch ($dataType) {
|
||||
case "s":
|
||||
// Value is a shared string
|
||||
if ((string)$c->v != '') {
|
||||
$value = $sharedStrings[intval($c->v)];
|
||||
} else {
|
||||
$value = '';
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case "b":
|
||||
// Value is boolean
|
||||
$value = (string)$c->v;
|
||||
if ($value == '0') {
|
||||
$value = false;
|
||||
} else if ($value == '1') {
|
||||
$value = true;
|
||||
} else {
|
||||
$value = (bool)$c->v;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case "inlineStr":
|
||||
// Value is rich text inline
|
||||
$value = $this->_parseRichText($c->is);
|
||||
|
||||
break;
|
||||
|
||||
case "e":
|
||||
// Value is an error message
|
||||
if ((string)$c->v != '') {
|
||||
$value = (string)$c->v;
|
||||
} else {
|
||||
$value = '';
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
// Value is a string
|
||||
$value = (string)$c->v;
|
||||
|
||||
// Check for numeric values
|
||||
if (is_numeric($value) && $dataType != 's') {
|
||||
if ($value == (int)$value) $value = (int)$value;
|
||||
elseif ($value == (float)$value) $value = (float)$value;
|
||||
elseif ($value == (double)$value) $value = (double)$value;
|
||||
}
|
||||
}
|
||||
|
||||
$documentBody[] = $value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Read core properties
|
||||
$coreProperties = $this->extractMetaData($package);
|
||||
|
||||
// Close file
|
||||
$package->close();
|
||||
|
||||
// Store filename
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8'));
|
||||
|
||||
// Store contents
|
||||
if ($storeContent) {
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $documentBody), 'UTF-8'));
|
||||
} else {
|
||||
$this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentBody), 'UTF-8'));
|
||||
}
|
||||
|
||||
// Store meta data properties
|
||||
foreach ($coreProperties as $key => $value)
|
||||
{
|
||||
$this->addField(Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8'));
|
||||
}
|
||||
|
||||
// Store title (if not present in meta data)
|
||||
if (!isset($coreProperties['title']))
|
||||
{
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8'));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse rich text XML
|
||||
*
|
||||
* @param SimpleXMLElement $is
|
||||
* @return string
|
||||
*/
|
||||
private function _parseRichText($is = null) {
|
||||
$value = array();
|
||||
|
||||
if (isset($is->t)) {
|
||||
$value[] = (string)$is->t;
|
||||
} else {
|
||||
foreach ($is->r as $run) {
|
||||
$value[] = (string)$run->t;
|
||||
}
|
||||
}
|
||||
|
||||
return implode('', $value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Load Xlsx document from a file
|
||||
*
|
||||
* @param string $fileName
|
||||
* @param boolean $storeContent
|
||||
* @return Zend_Search_Lucene_Document_Xlsx
|
||||
*/
|
||||
public static function loadXlsxFile($fileName, $storeContent = false)
|
||||
{
|
||||
return new Zend_Search_Lucene_Document_Xlsx($fileName, $storeContent);
|
||||
}
|
||||
}
|
37
thirdparty/Zend/Search/Lucene/Exception.php
vendored
Normal file
37
thirdparty/Zend/Search/Lucene/Exception.php
vendored
Normal file
@ -0,0 +1,37 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Exception.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Framework base exception
|
||||
*/
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Exception extends Zend_Search_Exception
|
||||
{}
|
||||
|
443
thirdparty/Zend/Search/Lucene/FSM.php
vendored
Normal file
443
thirdparty/Zend/Search/Lucene/FSM.php
vendored
Normal file
@ -0,0 +1,443 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: FSM.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
/** Zend_Search_Lucene_FSMAction */
|
||||
require_once 'Zend/Search/Lucene/FSMAction.php';
|
||||
|
||||
/**
|
||||
* Abstract Finite State Machine
|
||||
*
|
||||
* Take a look on Wikipedia state machine description: http://en.wikipedia.org/wiki/Finite_state_machine
|
||||
*
|
||||
* Any type of Transducers (Moore machine or Mealy machine) also may be implemented by using this abstract FSM.
|
||||
* process() methods invokes a specified actions which may construct FSM output.
|
||||
* Actions may be also used to signal, that we have reached Accept State
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
abstract class Zend_Search_Lucene_FSM
|
||||
{
|
||||
/**
|
||||
* Machine States alphabet
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_states = array();
|
||||
|
||||
/**
|
||||
* Current state
|
||||
*
|
||||
* @var integer|string
|
||||
*/
|
||||
private $_currentState = null;
|
||||
|
||||
/**
|
||||
* Input alphabet
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_inputAphabet = array();
|
||||
|
||||
/**
|
||||
* State transition table
|
||||
*
|
||||
* [sourceState][input] => targetState
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_rules = array();
|
||||
|
||||
/**
|
||||
* List of entry actions
|
||||
* Each action executes when entering the state
|
||||
*
|
||||
* [state] => action
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_entryActions = array();
|
||||
|
||||
/**
|
||||
* List of exit actions
|
||||
* Each action executes when exiting the state
|
||||
*
|
||||
* [state] => action
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_exitActions = array();
|
||||
|
||||
/**
|
||||
* List of input actions
|
||||
* Each action executes when entering the state
|
||||
*
|
||||
* [state][input] => action
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_inputActions = array();
|
||||
|
||||
/**
|
||||
* List of input actions
|
||||
* Each action executes when entering the state
|
||||
*
|
||||
* [state1][state2] => action
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_transitionActions = array();
|
||||
|
||||
/**
|
||||
* Finite State machine constructor
|
||||
*
|
||||
* $states is an array of integers or strings with a list of possible machine states
|
||||
* constructor treats fist list element as a sturt state (assignes it to $_current state).
|
||||
* It may be reassigned by setState() call.
|
||||
* States list may be empty and can be extended later by addState() or addStates() calls.
|
||||
*
|
||||
* $inputAphabet is the same as $states, but represents input alphabet
|
||||
* it also may be extended later by addInputSymbols() or addInputSymbol() calls.
|
||||
*
|
||||
* $rules parameter describes FSM transitions and has a structure:
|
||||
* array( array(sourseState, input, targetState[, inputAction]),
|
||||
* array(sourseState, input, targetState[, inputAction]),
|
||||
* array(sourseState, input, targetState[, inputAction]),
|
||||
* ...
|
||||
* )
|
||||
* Rules also can be added later by addRules() and addRule() calls.
|
||||
*
|
||||
* FSM actions are very flexible and may be defined by addEntryAction(), addExitAction(),
|
||||
* addInputAction() and addTransitionAction() calls.
|
||||
*
|
||||
* @param array $states
|
||||
* @param array $inputAphabet
|
||||
* @param array $rules
|
||||
*/
|
||||
public function __construct($states = array(), $inputAphabet = array(), $rules = array())
|
||||
{
|
||||
$this->addStates($states);
|
||||
$this->addInputSymbols($inputAphabet);
|
||||
$this->addRules($rules);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add states to the state machine
|
||||
*
|
||||
* @param array $states
|
||||
*/
|
||||
public function addStates($states)
|
||||
{
|
||||
foreach ($states as $state) {
|
||||
$this->addState($state);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add state to the state machine
|
||||
*
|
||||
* @param integer|string $state
|
||||
*/
|
||||
public function addState($state)
|
||||
{
|
||||
$this->_states[$state] = $state;
|
||||
|
||||
if ($this->_currentState === null) {
|
||||
$this->_currentState = $state;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set FSM state.
|
||||
* No any action is invoked
|
||||
*
|
||||
* @param integer|string $state
|
||||
* @throws Zend_Search_Exception
|
||||
*/
|
||||
public function setState($state)
|
||||
{
|
||||
if (!isset($this->_states[$state])) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('State \'' . $state . '\' is not on of the possible FSM states.');
|
||||
}
|
||||
|
||||
$this->_currentState = $state;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get FSM state.
|
||||
*
|
||||
* @return integer|string $state|null
|
||||
*/
|
||||
public function getState()
|
||||
{
|
||||
return $this->_currentState;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add symbols to the input alphabet
|
||||
*
|
||||
* @param array $inputAphabet
|
||||
*/
|
||||
public function addInputSymbols($inputAphabet)
|
||||
{
|
||||
foreach ($inputAphabet as $inputSymbol) {
|
||||
$this->addInputSymbol($inputSymbol);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add symbol to the input alphabet
|
||||
*
|
||||
* @param integer|string $inputSymbol
|
||||
*/
|
||||
public function addInputSymbol($inputSymbol)
|
||||
{
|
||||
$this->_inputAphabet[$inputSymbol] = $inputSymbol;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add transition rules
|
||||
*
|
||||
* array structure:
|
||||
* array( array(sourseState, input, targetState[, inputAction]),
|
||||
* array(sourseState, input, targetState[, inputAction]),
|
||||
* array(sourseState, input, targetState[, inputAction]),
|
||||
* ...
|
||||
* )
|
||||
*
|
||||
* @param array $rules
|
||||
*/
|
||||
public function addRules($rules)
|
||||
{
|
||||
foreach ($rules as $rule) {
|
||||
$this->addrule($rule[0], $rule[1], $rule[2], isset($rule[3])?$rule[3]:null);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add symbol to the input alphabet
|
||||
*
|
||||
* @param integer|string $sourceState
|
||||
* @param integer|string $input
|
||||
* @param integer|string $targetState
|
||||
* @param Zend_Search_Lucene_FSMAction|null $inputAction
|
||||
* @throws Zend_Search_Exception
|
||||
*/
|
||||
public function addRule($sourceState, $input, $targetState, $inputAction = null)
|
||||
{
|
||||
if (!isset($this->_states[$sourceState])) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('Undefined source state (' . $sourceState . ').');
|
||||
}
|
||||
if (!isset($this->_states[$targetState])) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('Undefined target state (' . $targetState . ').');
|
||||
}
|
||||
if (!isset($this->_inputAphabet[$input])) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('Undefined input symbol (' . $input . ').');
|
||||
}
|
||||
|
||||
if (!isset($this->_rules[$sourceState])) {
|
||||
$this->_rules[$sourceState] = array();
|
||||
}
|
||||
if (isset($this->_rules[$sourceState][$input])) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('Rule for {state,input} pair (' . $sourceState . ', '. $input . ') is already defined.');
|
||||
}
|
||||
|
||||
$this->_rules[$sourceState][$input] = $targetState;
|
||||
|
||||
|
||||
if ($inputAction !== null) {
|
||||
$this->addInputAction($sourceState, $input, $inputAction);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add state entry action.
|
||||
* Several entry actions are allowed.
|
||||
* Action execution order is defined by addEntryAction() calls
|
||||
*
|
||||
* @param integer|string $state
|
||||
* @param Zend_Search_Lucene_FSMAction $action
|
||||
*/
|
||||
public function addEntryAction($state, Zend_Search_Lucene_FSMAction $action)
|
||||
{
|
||||
if (!isset($this->_states[$state])) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('Undefined state (' . $state. ').');
|
||||
}
|
||||
|
||||
if (!isset($this->_entryActions[$state])) {
|
||||
$this->_entryActions[$state] = array();
|
||||
}
|
||||
|
||||
$this->_entryActions[$state][] = $action;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add state exit action.
|
||||
* Several exit actions are allowed.
|
||||
* Action execution order is defined by addEntryAction() calls
|
||||
*
|
||||
* @param integer|string $state
|
||||
* @param Zend_Search_Lucene_FSMAction $action
|
||||
*/
|
||||
public function addExitAction($state, Zend_Search_Lucene_FSMAction $action)
|
||||
{
|
||||
if (!isset($this->_states[$state])) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('Undefined state (' . $state. ').');
|
||||
}
|
||||
|
||||
if (!isset($this->_exitActions[$state])) {
|
||||
$this->_exitActions[$state] = array();
|
||||
}
|
||||
|
||||
$this->_exitActions[$state][] = $action;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add input action (defined by {state, input} pair).
|
||||
* Several input actions are allowed.
|
||||
* Action execution order is defined by addInputAction() calls
|
||||
*
|
||||
* @param integer|string $state
|
||||
* @param integer|string $input
|
||||
* @param Zend_Search_Lucene_FSMAction $action
|
||||
*/
|
||||
public function addInputAction($state, $inputSymbol, Zend_Search_Lucene_FSMAction $action)
|
||||
{
|
||||
if (!isset($this->_states[$state])) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('Undefined state (' . $state. ').');
|
||||
}
|
||||
if (!isset($this->_inputAphabet[$inputSymbol])) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('Undefined input symbol (' . $inputSymbol. ').');
|
||||
}
|
||||
|
||||
if (!isset($this->_inputActions[$state])) {
|
||||
$this->_inputActions[$state] = array();
|
||||
}
|
||||
if (!isset($this->_inputActions[$state][$inputSymbol])) {
|
||||
$this->_inputActions[$state][$inputSymbol] = array();
|
||||
}
|
||||
|
||||
$this->_inputActions[$state][$inputSymbol][] = $action;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add transition action (defined by {state, input} pair).
|
||||
* Several transition actions are allowed.
|
||||
* Action execution order is defined by addTransitionAction() calls
|
||||
*
|
||||
* @param integer|string $sourceState
|
||||
* @param integer|string $targetState
|
||||
* @param Zend_Search_Lucene_FSMAction $action
|
||||
*/
|
||||
public function addTransitionAction($sourceState, $targetState, Zend_Search_Lucene_FSMAction $action)
|
||||
{
|
||||
if (!isset($this->_states[$sourceState])) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('Undefined source state (' . $sourceState. ').');
|
||||
}
|
||||
if (!isset($this->_states[$targetState])) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('Undefined source state (' . $targetState. ').');
|
||||
}
|
||||
|
||||
if (!isset($this->_transitionActions[$sourceState])) {
|
||||
$this->_transitionActions[$sourceState] = array();
|
||||
}
|
||||
if (!isset($this->_transitionActions[$sourceState][$targetState])) {
|
||||
$this->_transitionActions[$sourceState][$targetState] = array();
|
||||
}
|
||||
|
||||
$this->_transitionActions[$sourceState][$targetState][] = $action;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Process an input
|
||||
*
|
||||
* @param mixed $input
|
||||
* @throws Zend_Search_Exception
|
||||
*/
|
||||
public function process($input)
|
||||
{
|
||||
if (!isset($this->_rules[$this->_currentState])) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('There is no any rule for current state (' . $this->_currentState . ').');
|
||||
}
|
||||
if (!isset($this->_rules[$this->_currentState][$input])) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('There is no any rule for {current state, input} pair (' . $this->_currentState . ', ' . $input . ').');
|
||||
}
|
||||
|
||||
$sourceState = $this->_currentState;
|
||||
$targetState = $this->_rules[$this->_currentState][$input];
|
||||
|
||||
if ($sourceState != $targetState && isset($this->_exitActions[$sourceState])) {
|
||||
foreach ($this->_exitActions[$sourceState] as $action) {
|
||||
$action->doAction();
|
||||
}
|
||||
}
|
||||
if (isset($this->_inputActions[$sourceState]) &&
|
||||
isset($this->_inputActions[$sourceState][$input])) {
|
||||
foreach ($this->_inputActions[$sourceState][$input] as $action) {
|
||||
$action->doAction();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
$this->_currentState = $targetState;
|
||||
|
||||
if (isset($this->_transitionActions[$sourceState]) &&
|
||||
isset($this->_transitionActions[$sourceState][$targetState])) {
|
||||
foreach ($this->_transitionActions[$sourceState][$targetState] as $action) {
|
||||
$action->doAction();
|
||||
}
|
||||
}
|
||||
if ($sourceState != $targetState && isset($this->_entryActions[$targetState])) {
|
||||
foreach ($this->_entryActions[$targetState] as $action) {
|
||||
$action->doAction();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public function reset()
|
||||
{
|
||||
if (count($this->_states) == 0) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('There is no any state defined for FSM.');
|
||||
}
|
||||
|
||||
$this->_currentState = $this->_states[0];
|
||||
}
|
||||
}
|
||||
|
66
thirdparty/Zend/Search/Lucene/FSMAction.php
vendored
Normal file
66
thirdparty/Zend/Search/Lucene/FSMAction.php
vendored
Normal file
@ -0,0 +1,66 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: FSMAction.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Abstract Finite State Machine
|
||||
*
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_FSMAction
|
||||
{
|
||||
/**
|
||||
* Object reference
|
||||
*
|
||||
* @var object
|
||||
*/
|
||||
private $_object;
|
||||
|
||||
/**
|
||||
* Method name
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_method;
|
||||
|
||||
/**
|
||||
* Object constructor
|
||||
*
|
||||
* @param object $object
|
||||
* @param string $method
|
||||
*/
|
||||
public function __construct($object, $method)
|
||||
{
|
||||
$this->_object = $object;
|
||||
$this->_method = $method;
|
||||
}
|
||||
|
||||
public function doAction()
|
||||
{
|
||||
$methodName = $this->_method;
|
||||
$this->_object->$methodName();
|
||||
}
|
||||
}
|
||||
|
226
thirdparty/Zend/Search/Lucene/Field.php
vendored
Normal file
226
thirdparty/Zend/Search/Lucene/Field.php
vendored
Normal file
@ -0,0 +1,226 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Document
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Field.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* A field is a section of a Document. Each field has two parts,
|
||||
* a name and a value. Values may be free text or they may be atomic
|
||||
* keywords, which are not further processed. Such keywords may
|
||||
* be used to represent dates, urls, etc. Fields are optionally
|
||||
* stored in the index, so that they may be returned with hits
|
||||
* on the document.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Document
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Field
|
||||
{
|
||||
/**
|
||||
* Field name
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $name;
|
||||
|
||||
/**
|
||||
* Field value
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
public $value;
|
||||
|
||||
/**
|
||||
* Field is to be stored in the index for return with search hits.
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
public $isStored = false;
|
||||
|
||||
/**
|
||||
* Field is to be indexed, so that it may be searched on.
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
public $isIndexed = true;
|
||||
|
||||
/**
|
||||
* Field should be tokenized as text prior to indexing.
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
public $isTokenized = true;
|
||||
/**
|
||||
* Field is stored as binary.
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
public $isBinary = false;
|
||||
|
||||
/**
|
||||
* Field are stored as a term vector
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
public $storeTermVector = false;
|
||||
|
||||
/**
|
||||
* Field boost factor
|
||||
* It's not stored directly in the index, but affects on normalization factor
|
||||
*
|
||||
* @var float
|
||||
*/
|
||||
public $boost = 1.0;
|
||||
|
||||
/**
|
||||
* Field value encoding.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $encoding;
|
||||
|
||||
/**
|
||||
* Object constructor
|
||||
*
|
||||
* @param string $name
|
||||
* @param string $value
|
||||
* @param string $encoding
|
||||
* @param boolean $isStored
|
||||
* @param boolean $isIndexed
|
||||
* @param boolean $isTokenized
|
||||
* @param boolean $isBinary
|
||||
*/
|
||||
public function __construct($name, $value, $encoding, $isStored, $isIndexed, $isTokenized, $isBinary = false)
|
||||
{
|
||||
$this->name = $name;
|
||||
$this->value = $value;
|
||||
|
||||
if (!$isBinary) {
|
||||
$this->encoding = $encoding;
|
||||
$this->isTokenized = $isTokenized;
|
||||
} else {
|
||||
$this->encoding = '';
|
||||
$this->isTokenized = false;
|
||||
}
|
||||
|
||||
$this->isStored = $isStored;
|
||||
$this->isIndexed = $isIndexed;
|
||||
$this->isBinary = $isBinary;
|
||||
|
||||
$this->storeTermVector = false;
|
||||
$this->boost = 1.0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a String-valued Field that is not tokenized, but is indexed
|
||||
* and stored. Useful for non-text fields, e.g. date or url.
|
||||
*
|
||||
* @param string $name
|
||||
* @param string $value
|
||||
* @param string $encoding
|
||||
* @return Zend_Search_Lucene_Field
|
||||
*/
|
||||
public static function keyword($name, $value, $encoding = '')
|
||||
{
|
||||
return new self($name, $value, $encoding, true, true, false);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a String-valued Field that is not tokenized nor indexed,
|
||||
* but is stored in the index, for return with hits.
|
||||
*
|
||||
* @param string $name
|
||||
* @param string $value
|
||||
* @param string $encoding
|
||||
* @return Zend_Search_Lucene_Field
|
||||
*/
|
||||
public static function unIndexed($name, $value, $encoding = '')
|
||||
{
|
||||
return new self($name, $value, $encoding, true, false, false);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a Binary String valued Field that is not tokenized nor indexed,
|
||||
* but is stored in the index, for return with hits.
|
||||
*
|
||||
* @param string $name
|
||||
* @param string $value
|
||||
* @param string $encoding
|
||||
* @return Zend_Search_Lucene_Field
|
||||
*/
|
||||
public static function binary($name, $value)
|
||||
{
|
||||
return new self($name, $value, '', true, false, false, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a String-valued Field that is tokenized and indexed,
|
||||
* and is stored in the index, for return with hits. Useful for short text
|
||||
* fields, like "title" or "subject". Term vector will not be stored for this field.
|
||||
*
|
||||
* @param string $name
|
||||
* @param string $value
|
||||
* @param string $encoding
|
||||
* @return Zend_Search_Lucene_Field
|
||||
*/
|
||||
public static function text($name, $value, $encoding = '')
|
||||
{
|
||||
return new self($name, $value, $encoding, true, true, true);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a String-valued Field that is tokenized and indexed,
|
||||
* but that is not stored in the index.
|
||||
*
|
||||
* @param string $name
|
||||
* @param string $value
|
||||
* @param string $encoding
|
||||
* @return Zend_Search_Lucene_Field
|
||||
*/
|
||||
public static function unStored($name, $value, $encoding = '')
|
||||
{
|
||||
return new self($name, $value, $encoding, false, true, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get field value in UTF-8 encoding
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getUtf8Value()
|
||||
{
|
||||
if (strcasecmp($this->encoding, 'utf8' ) == 0 ||
|
||||
strcasecmp($this->encoding, 'utf-8') == 0 ) {
|
||||
return $this->value;
|
||||
} else {
|
||||
|
||||
return (PHP_OS != 'AIX') ? iconv($this->encoding, 'UTF-8', $this->value) : iconv('ISO8859-1', 'UTF-8', $this->value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
268
thirdparty/Zend/Search/Lucene/Index/DictionaryLoader.php
vendored
Normal file
268
thirdparty/Zend/Search/Lucene/Index/DictionaryLoader.php
vendored
Normal file
@ -0,0 +1,268 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: DictionaryLoader.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
/**
|
||||
* Dictionary loader
|
||||
*
|
||||
* It's a dummy class which is created to encapsulate non-good structured code.
|
||||
* Manual "method inlining" is performed to increase dictionary index loading operation
|
||||
* which is major bottelneck for search performance.
|
||||
*
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Index_DictionaryLoader
|
||||
{
|
||||
/**
|
||||
* Dictionary index loader.
|
||||
*
|
||||
* It takes a string which is actually <segment_name>.tii index file data and
|
||||
* returns two arrays - term and tremInfo lists.
|
||||
*
|
||||
* See Zend_Search_Lucene_Index_SegmintInfo class for details
|
||||
*
|
||||
* @param string $data
|
||||
* @return array
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public static function load($data)
|
||||
{
|
||||
$termDictionary = array();
|
||||
$termInfos = array();
|
||||
$pos = 0;
|
||||
|
||||
// $tiVersion = $tiiFile->readInt();
|
||||
$tiVersion = ord($data[0]) << 24 | ord($data[1]) << 16 | ord($data[2]) << 8 | ord($data[3]);
|
||||
$pos += 4;
|
||||
if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */ &&
|
||||
$tiVersion != (int)0xFFFFFFFD /* 2.1+ format */) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Wrong TermInfoIndexFile file format');
|
||||
}
|
||||
|
||||
// $indexTermCount = $tiiFile->readLong();
|
||||
if (PHP_INT_SIZE > 4) {
|
||||
$indexTermCount = ord($data[$pos]) << 56 |
|
||||
ord($data[$pos+1]) << 48 |
|
||||
ord($data[$pos+2]) << 40 |
|
||||
ord($data[$pos+3]) << 32 |
|
||||
ord($data[$pos+4]) << 24 |
|
||||
ord($data[$pos+5]) << 16 |
|
||||
ord($data[$pos+6]) << 8 |
|
||||
ord($data[$pos+7]);
|
||||
} else {
|
||||
if ((ord($data[$pos]) != 0) ||
|
||||
(ord($data[$pos+1]) != 0) ||
|
||||
(ord($data[$pos+2]) != 0) ||
|
||||
(ord($data[$pos+3]) != 0) ||
|
||||
((ord($data[$pos+4]) & 0x80) != 0)) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
|
||||
}
|
||||
|
||||
$indexTermCount = ord($data[$pos+4]) << 24 |
|
||||
ord($data[$pos+5]) << 16 |
|
||||
ord($data[$pos+6]) << 8 |
|
||||
ord($data[$pos+7]);
|
||||
}
|
||||
$pos += 8;
|
||||
|
||||
// $tiiFile->readInt(); // IndexInterval
|
||||
$pos += 4;
|
||||
|
||||
// $skipInterval = $tiiFile->readInt();
|
||||
$skipInterval = ord($data[$pos]) << 24 | ord($data[$pos+1]) << 16 | ord($data[$pos+2]) << 8 | ord($data[$pos+3]);
|
||||
$pos += 4;
|
||||
if ($indexTermCount < 1) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Wrong number of terms in a term dictionary index');
|
||||
}
|
||||
|
||||
if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) {
|
||||
/* Skip MaxSkipLevels value */
|
||||
$pos += 4;
|
||||
}
|
||||
|
||||
$prevTerm = '';
|
||||
$freqPointer = 0;
|
||||
$proxPointer = 0;
|
||||
$indexPointer = 0;
|
||||
for ($count = 0; $count < $indexTermCount; $count++) {
|
||||
//$termPrefixLength = $tiiFile->readVInt();
|
||||
$nbyte = ord($data[$pos++]);
|
||||
$termPrefixLength = $nbyte & 0x7F;
|
||||
for ($shift=7; ($nbyte & 0x80) != 0; $shift += 7) {
|
||||
$nbyte = ord($data[$pos++]);
|
||||
$termPrefixLength |= ($nbyte & 0x7F) << $shift;
|
||||
}
|
||||
|
||||
// $termSuffix = $tiiFile->readString();
|
||||
$nbyte = ord($data[$pos++]);
|
||||
$len = $nbyte & 0x7F;
|
||||
for ($shift=7; ($nbyte & 0x80) != 0; $shift += 7) {
|
||||
$nbyte = ord($data[$pos++]);
|
||||
$len |= ($nbyte & 0x7F) << $shift;
|
||||
}
|
||||
if ($len == 0) {
|
||||
$termSuffix = '';
|
||||
} else {
|
||||
$termSuffix = substr($data, $pos, $len);
|
||||
$pos += $len;
|
||||
for ($count1 = 0; $count1 < $len; $count1++ ) {
|
||||
if (( ord($termSuffix[$count1]) & 0xC0 ) == 0xC0) {
|
||||
$addBytes = 1;
|
||||
if (ord($termSuffix[$count1]) & 0x20 ) {
|
||||
$addBytes++;
|
||||
|
||||
// Never used for Java Lucene created index.
|
||||
// Java2 doesn't encode strings in four bytes
|
||||
if (ord($termSuffix[$count1]) & 0x10 ) {
|
||||
$addBytes++;
|
||||
}
|
||||
}
|
||||
$termSuffix .= substr($data, $pos, $addBytes);
|
||||
$pos += $addBytes;
|
||||
$len += $addBytes;
|
||||
|
||||
// Check for null character. Java2 encodes null character
|
||||
// in two bytes.
|
||||
if (ord($termSuffix[$count1]) == 0xC0 &&
|
||||
ord($termSuffix[$count1+1]) == 0x80 ) {
|
||||
$termSuffix[$count1] = 0;
|
||||
$termSuffix = substr($termSuffix,0,$count1+1)
|
||||
. substr($termSuffix,$count1+2);
|
||||
}
|
||||
$count1 += $addBytes;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// $termValue = Zend_Search_Lucene_Index_Term::getPrefix($prevTerm, $termPrefixLength) . $termSuffix;
|
||||
$pb = 0; $pc = 0;
|
||||
while ($pb < strlen($prevTerm) && $pc < $termPrefixLength) {
|
||||
$charBytes = 1;
|
||||
if ((ord($prevTerm[$pb]) & 0xC0) == 0xC0) {
|
||||
$charBytes++;
|
||||
if (ord($prevTerm[$pb]) & 0x20 ) {
|
||||
$charBytes++;
|
||||
if (ord($prevTerm[$pb]) & 0x10 ) {
|
||||
$charBytes++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($pb + $charBytes > strlen($data)) {
|
||||
// wrong character
|
||||
break;
|
||||
}
|
||||
|
||||
$pc++;
|
||||
$pb += $charBytes;
|
||||
}
|
||||
$termValue = substr($prevTerm, 0, $pb) . $termSuffix;
|
||||
|
||||
// $termFieldNum = $tiiFile->readVInt();
|
||||
$nbyte = ord($data[$pos++]);
|
||||
$termFieldNum = $nbyte & 0x7F;
|
||||
for ($shift=7; ($nbyte & 0x80) != 0; $shift += 7) {
|
||||
$nbyte = ord($data[$pos++]);
|
||||
$termFieldNum |= ($nbyte & 0x7F) << $shift;
|
||||
}
|
||||
|
||||
// $docFreq = $tiiFile->readVInt();
|
||||
$nbyte = ord($data[$pos++]);
|
||||
$docFreq = $nbyte & 0x7F;
|
||||
for ($shift=7; ($nbyte & 0x80) != 0; $shift += 7) {
|
||||
$nbyte = ord($data[$pos++]);
|
||||
$docFreq |= ($nbyte & 0x7F) << $shift;
|
||||
}
|
||||
|
||||
// $freqPointer += $tiiFile->readVInt();
|
||||
$nbyte = ord($data[$pos++]);
|
||||
$vint = $nbyte & 0x7F;
|
||||
for ($shift=7; ($nbyte & 0x80) != 0; $shift += 7) {
|
||||
$nbyte = ord($data[$pos++]);
|
||||
$vint |= ($nbyte & 0x7F) << $shift;
|
||||
}
|
||||
$freqPointer += $vint;
|
||||
|
||||
// $proxPointer += $tiiFile->readVInt();
|
||||
$nbyte = ord($data[$pos++]);
|
||||
$vint = $nbyte & 0x7F;
|
||||
for ($shift=7; ($nbyte & 0x80) != 0; $shift += 7) {
|
||||
$nbyte = ord($data[$pos++]);
|
||||
$vint |= ($nbyte & 0x7F) << $shift;
|
||||
}
|
||||
$proxPointer += $vint;
|
||||
|
||||
if( $docFreq >= $skipInterval ) {
|
||||
// $skipDelta = $tiiFile->readVInt();
|
||||
$nbyte = ord($data[$pos++]);
|
||||
$vint = $nbyte & 0x7F;
|
||||
for ($shift=7; ($nbyte & 0x80) != 0; $shift += 7) {
|
||||
$nbyte = ord($data[$pos++]);
|
||||
$vint |= ($nbyte & 0x7F) << $shift;
|
||||
}
|
||||
$skipDelta = $vint;
|
||||
} else {
|
||||
$skipDelta = 0;
|
||||
}
|
||||
|
||||
// $indexPointer += $tiiFile->readVInt();
|
||||
$nbyte = ord($data[$pos++]);
|
||||
$vint = $nbyte & 0x7F;
|
||||
for ($shift=7; ($nbyte & 0x80) != 0; $shift += 7) {
|
||||
$nbyte = ord($data[$pos++]);
|
||||
$vint |= ($nbyte & 0x7F) << $shift;
|
||||
}
|
||||
$indexPointer += $vint;
|
||||
|
||||
|
||||
// $this->_termDictionary[] = new Zend_Search_Lucene_Index_Term($termValue, $termFieldNum);
|
||||
$termDictionary[] = array($termFieldNum, $termValue);
|
||||
|
||||
$termInfos[] =
|
||||
// new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipDelta, $indexPointer);
|
||||
array($docFreq, $freqPointer, $proxPointer, $skipDelta, $indexPointer);
|
||||
|
||||
$prevTerm = $termValue;
|
||||
}
|
||||
|
||||
// Check special index entry mark
|
||||
if ($termDictionary[0][0] != (int)0xFFFFFFFF) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Wrong TermInfoIndexFile file format');
|
||||
}
|
||||
|
||||
if (PHP_INT_SIZE > 4) {
|
||||
// Treat 64-bit 0xFFFFFFFF as -1
|
||||
$termDictionary[0][0] = -1;
|
||||
}
|
||||
|
||||
return array($termDictionary, $termInfos);
|
||||
}
|
||||
}
|
||||
|
59
thirdparty/Zend/Search/Lucene/Index/DocsFilter.php
vendored
Normal file
59
thirdparty/Zend/Search/Lucene/Index/DocsFilter.php
vendored
Normal file
@ -0,0 +1,59 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: DocsFilter.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* A Zend_Search_Lucene_Index_DocsFilter is used to filter documents while searching.
|
||||
*
|
||||
* It may or _may_not_ be used for actual filtering, so it's just a hint that upper query limits
|
||||
* search result by specified list.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Index_DocsFilter
|
||||
{
|
||||
/**
|
||||
* Set of segment filters:
|
||||
* array( <segmentName> => array(<docId> => <undefined_value>,
|
||||
* <docId> => <undefined_value>,
|
||||
* <docId> => <undefined_value>,
|
||||
* ... ),
|
||||
* <segmentName> => array(<docId> => <undefined_value>,
|
||||
* <docId> => <undefined_value>,
|
||||
* <docId> => <undefined_value>,
|
||||
* ... ),
|
||||
* <segmentName> => array(<docId> => <undefined_value>,
|
||||
* <docId> => <undefined_value>,
|
||||
* <docId> => <undefined_value>,
|
||||
* ... ),
|
||||
* ...
|
||||
* )
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
public $segmentFilters = array();
|
||||
}
|
||||
|
50
thirdparty/Zend/Search/Lucene/Index/FieldInfo.php
vendored
Normal file
50
thirdparty/Zend/Search/Lucene/Index/FieldInfo.php
vendored
Normal file
@ -0,0 +1,50 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: FieldInfo.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Index_FieldInfo
|
||||
{
|
||||
public $name;
|
||||
public $isIndexed;
|
||||
public $number;
|
||||
public $storeTermVector;
|
||||
public $normsOmitted;
|
||||
public $payloadsStored;
|
||||
|
||||
public function __construct($name, $isIndexed, $number, $storeTermVector, $normsOmitted = false, $payloadsStored = false)
|
||||
{
|
||||
$this->name = $name;
|
||||
$this->isIndexed = $isIndexed;
|
||||
$this->number = $number;
|
||||
$this->storeTermVector = $storeTermVector;
|
||||
$this->normsOmitted = $normsOmitted;
|
||||
$this->payloadsStored = $payloadsStored;
|
||||
}
|
||||
}
|
||||
|
2130
thirdparty/Zend/Search/Lucene/Index/SegmentInfo.php
vendored
Normal file
2130
thirdparty/Zend/Search/Lucene/Index/SegmentInfo.php
vendored
Normal file
@ -0,0 +1,2130 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: SegmentInfo.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
/** Zend_Search_Lucene_Index_TermsStream_Interface */
|
||||
require_once 'Zend/Search/Lucene/Index/TermsStream/Interface.php';
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Similarity */
|
||||
require_once 'Zend/Search/Lucene/Search/Similarity.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_FieldInfo */
|
||||
require_once 'Zend/Search/Lucene/Index/FieldInfo.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_Term */
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_TermInfo */
|
||||
require_once 'Zend/Search/Lucene/Index/TermInfo.php';
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Index_SegmentInfo implements Zend_Search_Lucene_Index_TermsStream_Interface
|
||||
{
|
||||
/**
|
||||
* "Full scan vs fetch" boundary.
|
||||
*
|
||||
* If filter selectivity is less than this value, then full scan is performed
|
||||
* (since term entries fetching has some additional overhead).
|
||||
*/
|
||||
const FULL_SCAN_VS_FETCH_BOUNDARY = 5;
|
||||
|
||||
/**
|
||||
* Number of docs in a segment
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_docCount;
|
||||
|
||||
/**
|
||||
* Segment name
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_name;
|
||||
|
||||
/**
|
||||
* Term Dictionary Index
|
||||
*
|
||||
* Array of arrays (Zend_Search_Lucene_Index_Term objects are represented as arrays because
|
||||
* of performance considerations)
|
||||
* [0] -> $termValue
|
||||
* [1] -> $termFieldNum
|
||||
*
|
||||
* Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_termDictionary;
|
||||
|
||||
/**
|
||||
* Term Dictionary Index TermInfos
|
||||
*
|
||||
* Array of arrays (Zend_Search_Lucene_Index_TermInfo objects are represented as arrays because
|
||||
* of performance considerations)
|
||||
* [0] -> $docFreq
|
||||
* [1] -> $freqPointer
|
||||
* [2] -> $proxPointer
|
||||
* [3] -> $skipOffset
|
||||
* [4] -> $indexPointer
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_termDictionaryInfos;
|
||||
|
||||
/**
|
||||
* Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_fields;
|
||||
|
||||
/**
|
||||
* Field positions in a dictionary.
|
||||
* (Term dictionary contains filelds ordered by names)
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_fieldsDicPositions;
|
||||
|
||||
|
||||
/**
|
||||
* Associative array where the key is the file name and the value is data offset
|
||||
* in a compound segment file (.csf).
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_segFiles;
|
||||
|
||||
/**
|
||||
* Associative array where the key is the file name and the value is file size (.csf).
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_segFileSizes;
|
||||
|
||||
/**
|
||||
* Delete file generation number
|
||||
*
|
||||
* -2 means autodetect latest delete generation
|
||||
* -1 means 'there is no delete file'
|
||||
* 0 means pre-2.1 format delete file
|
||||
* X specifies used delete file
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_delGen;
|
||||
|
||||
/**
|
||||
* Segment has single norms file
|
||||
*
|
||||
* If true then one .nrm file is used for all fields
|
||||
* Otherwise .fN files are used
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
private $_hasSingleNormFile;
|
||||
|
||||
/**
|
||||
* Use compound segment file (*.cfs) to collect all other segment files
|
||||
* (excluding .del files)
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
private $_isCompound;
|
||||
|
||||
|
||||
/**
|
||||
* File system adapter.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Storage_Directory_Filesystem
|
||||
*/
|
||||
private $_directory;
|
||||
|
||||
/**
|
||||
* Normalization factors.
|
||||
* An array fieldName => normVector
|
||||
* normVector is a binary string.
|
||||
* Each byte corresponds to an indexed document in a segment and
|
||||
* encodes normalization factor (float value, encoded by
|
||||
* Zend_Search_Lucene_Search_Similarity::encodeNorm())
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_norms = array();
|
||||
|
||||
/**
|
||||
* List of deleted documents.
|
||||
* bitset if bitset extension is loaded or array otherwise.
|
||||
*
|
||||
* @var mixed
|
||||
*/
|
||||
private $_deleted = null;
|
||||
|
||||
/**
|
||||
* $this->_deleted update flag
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
private $_deletedDirty = false;
|
||||
|
||||
/**
|
||||
* True if segment uses shared doc store
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
private $_usesSharedDocStore;
|
||||
|
||||
/*
|
||||
* Shared doc store options.
|
||||
* It's an assotiative array with the following items:
|
||||
* - 'offset' => $docStoreOffset The starting document in the shared doc store files where this segment's documents begin
|
||||
* - 'segment' => $docStoreSegment The name of the segment that has the shared doc store files.
|
||||
* - 'isCompound' => $docStoreIsCompoundFile True, if compound file format is used for the shared doc store files (.cfx file).
|
||||
*/
|
||||
private $_sharedDocStoreOptions;
|
||||
|
||||
|
||||
/**
|
||||
* Zend_Search_Lucene_Index_SegmentInfo constructor
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_Directory $directory
|
||||
* @param string $name
|
||||
* @param integer $docCount
|
||||
* @param integer $delGen
|
||||
* @param array|null $docStoreOptions
|
||||
* @param boolean $hasSingleNormFile
|
||||
* @param boolean $isCompound
|
||||
*/
|
||||
public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name, $docCount, $delGen = 0, $docStoreOptions = null, $hasSingleNormFile = false, $isCompound = null)
|
||||
{
|
||||
$this->_directory = $directory;
|
||||
$this->_name = $name;
|
||||
$this->_docCount = $docCount;
|
||||
|
||||
if ($docStoreOptions !== null) {
|
||||
$this->_usesSharedDocStore = true;
|
||||
$this->_sharedDocStoreOptions = $docStoreOptions;
|
||||
|
||||
if ($docStoreOptions['isCompound']) {
|
||||
$cfxFile = $this->_directory->getFileObject($docStoreOptions['segment'] . '.cfx');
|
||||
$cfxFilesCount = $cfxFile->readVInt();
|
||||
|
||||
$cfxFiles = array();
|
||||
$cfxFileSizes = array();
|
||||
|
||||
for ($count = 0; $count < $cfxFilesCount; $count++) {
|
||||
$dataOffset = $cfxFile->readLong();
|
||||
if ($count != 0) {
|
||||
$cfxFileSizes[$fileName] = $dataOffset - end($cfxFiles);
|
||||
}
|
||||
$fileName = $cfxFile->readString();
|
||||
$cfxFiles[$fileName] = $dataOffset;
|
||||
}
|
||||
if ($count != 0) {
|
||||
$cfxFileSizes[$fileName] = $this->_directory->fileLength($docStoreOptions['segment'] . '.cfx') - $dataOffset;
|
||||
}
|
||||
|
||||
$this->_sharedDocStoreOptions['files'] = $cfxFiles;
|
||||
$this->_sharedDocStoreOptions['fileSizes'] = $cfxFileSizes;
|
||||
}
|
||||
}
|
||||
|
||||
$this->_hasSingleNormFile = $hasSingleNormFile;
|
||||
$this->_delGen = $delGen;
|
||||
$this->_termDictionary = null;
|
||||
|
||||
|
||||
if ($isCompound !== null) {
|
||||
$this->_isCompound = $isCompound;
|
||||
} else {
|
||||
// It's a pre-2.1 segment or isCompound is set to 'unknown'
|
||||
// Detect if segment uses compound file
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
try {
|
||||
// Try to open compound file
|
||||
$this->_directory->getFileObject($name . '.cfs');
|
||||
|
||||
// Compound file is found
|
||||
$this->_isCompound = true;
|
||||
} catch (Zend_Search_Lucene_Exception $e) {
|
||||
if (strpos($e->getMessage(), 'is not readable') !== false) {
|
||||
// Compound file is not found or is not readable
|
||||
$this->_isCompound = false;
|
||||
} else {
|
||||
throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$this->_segFiles = array();
|
||||
if ($this->_isCompound) {
|
||||
$cfsFile = $this->_directory->getFileObject($name . '.cfs');
|
||||
$segFilesCount = $cfsFile->readVInt();
|
||||
|
||||
for ($count = 0; $count < $segFilesCount; $count++) {
|
||||
$dataOffset = $cfsFile->readLong();
|
||||
if ($count != 0) {
|
||||
$this->_segFileSizes[$fileName] = $dataOffset - end($this->_segFiles);
|
||||
}
|
||||
$fileName = $cfsFile->readString();
|
||||
$this->_segFiles[$fileName] = $dataOffset;
|
||||
}
|
||||
if ($count != 0) {
|
||||
$this->_segFileSizes[$fileName] = $this->_directory->fileLength($name . '.cfs') - $dataOffset;
|
||||
}
|
||||
}
|
||||
|
||||
$fnmFile = $this->openCompoundFile('.fnm');
|
||||
$fieldsCount = $fnmFile->readVInt();
|
||||
$fieldNames = array();
|
||||
$fieldNums = array();
|
||||
$this->_fields = array();
|
||||
|
||||
for ($count=0; $count < $fieldsCount; $count++) {
|
||||
$fieldName = $fnmFile->readString();
|
||||
$fieldBits = $fnmFile->readByte();
|
||||
$this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName,
|
||||
$fieldBits & 0x01 /* field is indexed */,
|
||||
$count,
|
||||
$fieldBits & 0x02 /* termvectors are stored */,
|
||||
$fieldBits & 0x10 /* norms are omitted */,
|
||||
$fieldBits & 0x20 /* payloads are stored */);
|
||||
if ($fieldBits & 0x10) {
|
||||
// norms are omitted for the indexed field
|
||||
$this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount);
|
||||
}
|
||||
|
||||
$fieldNums[$count] = $count;
|
||||
$fieldNames[$count] = $fieldName;
|
||||
}
|
||||
array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums);
|
||||
$this->_fieldsDicPositions = array_flip($fieldNums);
|
||||
|
||||
if ($this->_delGen == -2) {
|
||||
// SegmentInfo constructor is invoked from index writer
|
||||
// Autodetect current delete file generation number
|
||||
$this->_delGen = $this->_detectLatestDelGen();
|
||||
}
|
||||
|
||||
// Load deletions
|
||||
$this->_deleted = $this->_loadDelFile();
|
||||
}
|
||||
|
||||
/**
|
||||
* Load detetions file
|
||||
*
|
||||
* Returns bitset or an array depending on bitset extension availability
|
||||
*
|
||||
* @return mixed
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
private function _loadDelFile()
|
||||
{
|
||||
if ($this->_delGen == -1) {
|
||||
// There is no delete file for this segment
|
||||
return null;
|
||||
} else if ($this->_delGen == 0) {
|
||||
// It's a segment with pre-2.1 format delete file
|
||||
// Try to load deletions file
|
||||
return $this->_loadPre21DelFile();
|
||||
} else {
|
||||
// It's 2.1+ format deleteions file
|
||||
return $this->_load21DelFile();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Load pre-2.1 detetions file
|
||||
*
|
||||
* Returns bitset or an array depending on bitset extension availability
|
||||
*
|
||||
* @return mixed
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
private function _loadPre21DelFile()
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
try {
|
||||
// '.del' files always stored in a separate file
|
||||
// Segment compound is not used
|
||||
$delFile = $this->_directory->getFileObject($this->_name . '.del');
|
||||
|
||||
$byteCount = $delFile->readInt();
|
||||
$byteCount = ceil($byteCount/8);
|
||||
$bitCount = $delFile->readInt();
|
||||
|
||||
if ($bitCount == 0) {
|
||||
$delBytes = '';
|
||||
} else {
|
||||
$delBytes = $delFile->readBytes($byteCount);
|
||||
}
|
||||
|
||||
if (extension_loaded('bitset')) {
|
||||
return $delBytes;
|
||||
} else {
|
||||
$deletions = array();
|
||||
for ($count = 0; $count < $byteCount; $count++) {
|
||||
$byte = ord($delBytes[$count]);
|
||||
for ($bit = 0; $bit < 8; $bit++) {
|
||||
if ($byte & (1<<$bit)) {
|
||||
$deletions[$count*8 + $bit] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $deletions;
|
||||
}
|
||||
} catch(Zend_Search_Lucene_Exception $e) {
|
||||
if (strpos($e->getMessage(), 'is not readable') === false) {
|
||||
throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
|
||||
}
|
||||
// There is no deletion file
|
||||
$this->_delGen = -1;
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Load 2.1+ format detetions file
|
||||
*
|
||||
* Returns bitset or an array depending on bitset extension availability
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
private function _load21DelFile()
|
||||
{
|
||||
$delFile = $this->_directory->getFileObject($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
|
||||
|
||||
$format = $delFile->readInt();
|
||||
|
||||
if ($format == (int)0xFFFFFFFF) {
|
||||
if (extension_loaded('bitset')) {
|
||||
$deletions = bitset_empty();
|
||||
} else {
|
||||
$deletions = array();
|
||||
}
|
||||
|
||||
$byteCount = $delFile->readInt();
|
||||
$bitCount = $delFile->readInt();
|
||||
|
||||
$delFileSize = $this->_directory->fileLength($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
|
||||
$byteNum = 0;
|
||||
|
||||
do {
|
||||
$dgap = $delFile->readVInt();
|
||||
$nonZeroByte = $delFile->readByte();
|
||||
|
||||
$byteNum += $dgap;
|
||||
|
||||
|
||||
if (extension_loaded('bitset')) {
|
||||
for ($bit = 0; $bit < 8; $bit++) {
|
||||
if ($nonZeroByte & (1<<$bit)) {
|
||||
bitset_incl($deletions, $byteNum*8 + $bit);
|
||||
}
|
||||
}
|
||||
return $deletions;
|
||||
} else {
|
||||
for ($bit = 0; $bit < 8; $bit++) {
|
||||
if ($nonZeroByte & (1<<$bit)) {
|
||||
$deletions[$byteNum*8 + $bit] = 1;
|
||||
}
|
||||
}
|
||||
return (count($deletions) > 0) ? $deletions : null;
|
||||
}
|
||||
|
||||
} while ($delFile->tell() < $delFileSize);
|
||||
} else {
|
||||
// $format is actually byte count
|
||||
$byteCount = ceil($format/8);
|
||||
$bitCount = $delFile->readInt();
|
||||
|
||||
if ($bitCount == 0) {
|
||||
$delBytes = '';
|
||||
} else {
|
||||
$delBytes = $delFile->readBytes($byteCount);
|
||||
}
|
||||
|
||||
if (extension_loaded('bitset')) {
|
||||
return $delBytes;
|
||||
} else {
|
||||
$deletions = array();
|
||||
for ($count = 0; $count < $byteCount; $count++) {
|
||||
$byte = ord($delBytes[$count]);
|
||||
for ($bit = 0; $bit < 8; $bit++) {
|
||||
if ($byte & (1<<$bit)) {
|
||||
$deletions[$count*8 + $bit] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (count($deletions) > 0) ? $deletions : null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Opens index file stoted within compound index file
|
||||
*
|
||||
* @param string $extension
|
||||
* @param boolean $shareHandler
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
* @return Zend_Search_Lucene_Storage_File
|
||||
*/
|
||||
public function openCompoundFile($extension, $shareHandler = true)
|
||||
{
|
||||
if (($extension == '.fdx' || $extension == '.fdt') && $this->_usesSharedDocStore) {
|
||||
$fdxFName = $this->_sharedDocStoreOptions['segment'] . '.fdx';
|
||||
$fdtFName = $this->_sharedDocStoreOptions['segment'] . '.fdt';
|
||||
|
||||
if (!$this->_sharedDocStoreOptions['isCompound']) {
|
||||
$fdxFile = $this->_directory->getFileObject($fdxFName, $shareHandler);
|
||||
$fdxFile->seek($this->_sharedDocStoreOptions['offset']*8, SEEK_CUR);
|
||||
|
||||
if ($extension == '.fdx') {
|
||||
// '.fdx' file is requested
|
||||
return $fdxFile;
|
||||
} else {
|
||||
// '.fdt' file is requested
|
||||
$fdtStartOffset = $fdxFile->readLong();
|
||||
|
||||
$fdtFile = $this->_directory->getFileObject($fdtFName, $shareHandler);
|
||||
$fdtFile->seek($fdtStartOffset, SEEK_CUR);
|
||||
|
||||
return $fdtFile;
|
||||
}
|
||||
}
|
||||
|
||||
if( !isset($this->_sharedDocStoreOptions['files'][$fdxFName]) ) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Shared doc storage segment compound file doesn\'t contain '
|
||||
. $fdxFName . ' file.' );
|
||||
}
|
||||
if( !isset($this->_sharedDocStoreOptions['files'][$fdtFName]) ) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Shared doc storage segment compound file doesn\'t contain '
|
||||
. $fdtFName . ' file.' );
|
||||
}
|
||||
|
||||
// Open shared docstore segment file
|
||||
$cfxFile = $this->_directory->getFileObject($this->_sharedDocStoreOptions['segment'] . '.cfx', $shareHandler);
|
||||
// Seek to the start of '.fdx' file within compound file
|
||||
$cfxFile->seek($this->_sharedDocStoreOptions['files'][$fdxFName]);
|
||||
// Seek to the start of current segment documents section
|
||||
$cfxFile->seek($this->_sharedDocStoreOptions['offset']*8, SEEK_CUR);
|
||||
|
||||
if ($extension == '.fdx') {
|
||||
// '.fdx' file is requested
|
||||
return $cfxFile;
|
||||
} else {
|
||||
// '.fdt' file is requested
|
||||
$fdtStartOffset = $cfxFile->readLong();
|
||||
|
||||
// Seek to the start of '.fdt' file within compound file
|
||||
$cfxFile->seek($this->_sharedDocStoreOptions['files'][$fdtFName]);
|
||||
// Seek to the start of current segment documents section
|
||||
$cfxFile->seek($fdtStartOffset, SEEK_CUR);
|
||||
|
||||
return $fdtFile;
|
||||
}
|
||||
}
|
||||
|
||||
$filename = $this->_name . $extension;
|
||||
|
||||
if (!$this->_isCompound) {
|
||||
return $this->_directory->getFileObject($filename, $shareHandler);
|
||||
}
|
||||
|
||||
if( !isset($this->_segFiles[$filename]) ) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Segment compound file doesn\'t contain '
|
||||
. $filename . ' file.' );
|
||||
}
|
||||
|
||||
$file = $this->_directory->getFileObject($this->_name . '.cfs', $shareHandler);
|
||||
$file->seek($this->_segFiles[$filename]);
|
||||
return $file;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get compound file length
|
||||
*
|
||||
* @param string $extension
|
||||
* @return integer
|
||||
*/
|
||||
public function compoundFileLength($extension)
|
||||
{
|
||||
if (($extension == '.fdx' || $extension == '.fdt') && $this->_usesSharedDocStore) {
|
||||
$filename = $this->_sharedDocStoreOptions['segment'] . $extension;
|
||||
|
||||
if (!$this->_sharedDocStoreOptions['isCompound']) {
|
||||
return $this->_directory->fileLength($filename);
|
||||
}
|
||||
|
||||
if( !isset($this->_sharedDocStoreOptions['fileSizes'][$filename]) ) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Shared doc store compound file doesn\'t contain '
|
||||
. $filename . ' file.' );
|
||||
}
|
||||
|
||||
return $this->_sharedDocStoreOptions['fileSizes'][$filename];
|
||||
}
|
||||
|
||||
|
||||
$filename = $this->_name . $extension;
|
||||
|
||||
// Try to get common file first
|
||||
if ($this->_directory->fileExists($filename)) {
|
||||
return $this->_directory->fileLength($filename);
|
||||
}
|
||||
|
||||
if( !isset($this->_segFileSizes[$filename]) ) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain '
|
||||
. $filename . ' file.' );
|
||||
}
|
||||
|
||||
return $this->_segFileSizes[$filename];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns field index or -1 if field is not found
|
||||
*
|
||||
* @param string $fieldName
|
||||
* @return integer
|
||||
*/
|
||||
public function getFieldNum($fieldName)
|
||||
{
|
||||
foreach( $this->_fields as $field ) {
|
||||
if( $field->name == $fieldName ) {
|
||||
return $field->number;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns field info for specified field
|
||||
*
|
||||
* @param integer $fieldNum
|
||||
* @return Zend_Search_Lucene_Index_FieldInfo
|
||||
*/
|
||||
public function getField($fieldNum)
|
||||
{
|
||||
return $this->_fields[$fieldNum];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns array of fields.
|
||||
* if $indexed parameter is true, then returns only indexed fields.
|
||||
*
|
||||
* @param boolean $indexed
|
||||
* @return array
|
||||
*/
|
||||
public function getFields($indexed = false)
|
||||
{
|
||||
$result = array();
|
||||
foreach( $this->_fields as $field ) {
|
||||
if( (!$indexed) || $field->isIndexed ) {
|
||||
$result[ $field->name ] = $field->name;
|
||||
}
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns array of FieldInfo objects.
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getFieldInfos()
|
||||
{
|
||||
return $this->_fields;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns actual deletions file generation number.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function getDelGen()
|
||||
{
|
||||
return $this->_delGen;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the total number of documents in this segment (including deleted documents).
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function count()
|
||||
{
|
||||
return $this->_docCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns number of deleted documents.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
private function _deletedCount()
|
||||
{
|
||||
if ($this->_deleted === null) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (extension_loaded('bitset')) {
|
||||
return count(bitset_to_array($this->_deleted));
|
||||
} else {
|
||||
return count($this->_deleted);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the total number of non-deleted documents in this segment.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function numDocs()
|
||||
{
|
||||
if ($this->hasDeletions()) {
|
||||
return $this->_docCount - $this->_deletedCount();
|
||||
} else {
|
||||
return $this->_docCount;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get field position in a fields dictionary
|
||||
*
|
||||
* @param integer $fieldNum
|
||||
* @return integer
|
||||
*/
|
||||
private function _getFieldPosition($fieldNum) {
|
||||
// Treat values which are not in a translation table as a 'direct value'
|
||||
return isset($this->_fieldsDicPositions[$fieldNum]) ?
|
||||
$this->_fieldsDicPositions[$fieldNum] : $fieldNum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return segment name
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getName()
|
||||
{
|
||||
return $this->_name;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* TermInfo cache
|
||||
*
|
||||
* Size is 1024.
|
||||
* Numbers are used instead of class constants because of performance considerations
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_termInfoCache = array();
|
||||
|
||||
private function _cleanUpTermInfoCache()
|
||||
{
|
||||
// Clean 256 term infos
|
||||
foreach ($this->_termInfoCache as $key => $termInfo) {
|
||||
unset($this->_termInfoCache[$key]);
|
||||
|
||||
// leave 768 last used term infos
|
||||
if (count($this->_termInfoCache) == 768) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Load terms dictionary index
|
||||
*
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
private function _loadDictionaryIndex()
|
||||
{
|
||||
// Check, if index is already serialized
|
||||
if ($this->_directory->fileExists($this->_name . '.sti')) {
|
||||
// Load serialized dictionary index data
|
||||
$stiFile = $this->_directory->getFileObject($this->_name . '.sti');
|
||||
$stiFileData = $stiFile->readBytes($this->_directory->fileLength($this->_name . '.sti'));
|
||||
|
||||
// Load dictionary index data
|
||||
if (($unserializedData = @unserialize($stiFileData)) !== false) {
|
||||
list($this->_termDictionary, $this->_termDictionaryInfos) = $unserializedData;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Load data from .tii file and generate .sti file
|
||||
|
||||
// Prefetch dictionary index data
|
||||
$tiiFile = $this->openCompoundFile('.tii');
|
||||
$tiiFileData = $tiiFile->readBytes($this->compoundFileLength('.tii'));
|
||||
|
||||
/** Zend_Search_Lucene_Index_DictionaryLoader */
|
||||
require_once 'Zend/Search/Lucene/Index/DictionaryLoader.php';
|
||||
|
||||
// Load dictionary index data
|
||||
list($this->_termDictionary, $this->_termDictionaryInfos) =
|
||||
Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData);
|
||||
|
||||
$stiFileData = serialize(array($this->_termDictionary, $this->_termDictionaryInfos));
|
||||
$stiFile = $this->_directory->createFile($this->_name . '.sti');
|
||||
$stiFile->writeBytes($stiFileData);
|
||||
}
|
||||
|
||||
/**
|
||||
* Scans terms dictionary and returns term info
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @return Zend_Search_Lucene_Index_TermInfo
|
||||
*/
|
||||
public function getTermInfo(Zend_Search_Lucene_Index_Term $term)
|
||||
{
|
||||
$termKey = $term->key();
|
||||
if (isset($this->_termInfoCache[$termKey])) {
|
||||
$termInfo = $this->_termInfoCache[$termKey];
|
||||
|
||||
// Move termInfo to the end of cache
|
||||
unset($this->_termInfoCache[$termKey]);
|
||||
$this->_termInfoCache[$termKey] = $termInfo;
|
||||
|
||||
return $termInfo;
|
||||
}
|
||||
|
||||
|
||||
if ($this->_termDictionary === null) {
|
||||
$this->_loadDictionaryIndex();
|
||||
}
|
||||
|
||||
$searchField = $this->getFieldNum($term->field);
|
||||
|
||||
if ($searchField == -1) {
|
||||
return null;
|
||||
}
|
||||
$searchDicField = $this->_getFieldPosition($searchField);
|
||||
|
||||
// search for appropriate value in dictionary
|
||||
$lowIndex = 0;
|
||||
$highIndex = count($this->_termDictionary)-1;
|
||||
while ($highIndex >= $lowIndex) {
|
||||
// $mid = ($highIndex - $lowIndex)/2;
|
||||
$mid = ($highIndex + $lowIndex) >> 1;
|
||||
$midTerm = $this->_termDictionary[$mid];
|
||||
|
||||
$fieldNum = $this->_getFieldPosition($midTerm[0] /* field */);
|
||||
$delta = $searchDicField - $fieldNum;
|
||||
if ($delta == 0) {
|
||||
$delta = strcmp($term->text, $midTerm[1] /* text */);
|
||||
}
|
||||
|
||||
if ($delta < 0) {
|
||||
$highIndex = $mid-1;
|
||||
} elseif ($delta > 0) {
|
||||
$lowIndex = $mid+1;
|
||||
} else {
|
||||
// return $this->_termDictionaryInfos[$mid]; // We got it!
|
||||
$a = $this->_termDictionaryInfos[$mid];
|
||||
$termInfo = new Zend_Search_Lucene_Index_TermInfo($a[0], $a[1], $a[2], $a[3], $a[4]);
|
||||
|
||||
// Put loaded termInfo into cache
|
||||
$this->_termInfoCache[$termKey] = $termInfo;
|
||||
|
||||
return $termInfo;
|
||||
}
|
||||
}
|
||||
|
||||
if ($highIndex == -1) {
|
||||
// Term is out of the dictionary range
|
||||
return null;
|
||||
}
|
||||
|
||||
$prevPosition = $highIndex;
|
||||
$prevTerm = $this->_termDictionary[$prevPosition];
|
||||
$prevTermInfo = $this->_termDictionaryInfos[$prevPosition];
|
||||
|
||||
$tisFile = $this->openCompoundFile('.tis');
|
||||
$tiVersion = $tisFile->readInt();
|
||||
if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */ &&
|
||||
$tiVersion != (int)0xFFFFFFFD /* 2.1+ format */) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
|
||||
}
|
||||
|
||||
$termCount = $tisFile->readLong();
|
||||
$indexInterval = $tisFile->readInt();
|
||||
$skipInterval = $tisFile->readInt();
|
||||
if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) {
|
||||
$maxSkipLevels = $tisFile->readInt();
|
||||
}
|
||||
|
||||
$tisFile->seek($prevTermInfo[4] /* indexPointer */ - (($tiVersion == (int)0xFFFFFFFD)? 24 : 20) /* header size*/, SEEK_CUR);
|
||||
|
||||
$termValue = $prevTerm[1] /* text */;
|
||||
$termFieldNum = $prevTerm[0] /* field */;
|
||||
$freqPointer = $prevTermInfo[1] /* freqPointer */;
|
||||
$proxPointer = $prevTermInfo[2] /* proxPointer */;
|
||||
for ($count = $prevPosition*$indexInterval + 1;
|
||||
$count <= $termCount &&
|
||||
( $this->_getFieldPosition($termFieldNum) < $searchDicField ||
|
||||
($this->_getFieldPosition($termFieldNum) == $searchDicField &&
|
||||
strcmp($termValue, $term->text) < 0) );
|
||||
$count++) {
|
||||
$termPrefixLength = $tisFile->readVInt();
|
||||
$termSuffix = $tisFile->readString();
|
||||
$termFieldNum = $tisFile->readVInt();
|
||||
$termValue = Zend_Search_Lucene_Index_Term::getPrefix($termValue, $termPrefixLength) . $termSuffix;
|
||||
|
||||
$docFreq = $tisFile->readVInt();
|
||||
$freqPointer += $tisFile->readVInt();
|
||||
$proxPointer += $tisFile->readVInt();
|
||||
if( $docFreq >= $skipInterval ) {
|
||||
$skipOffset = $tisFile->readVInt();
|
||||
} else {
|
||||
$skipOffset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if ($termFieldNum == $searchField && $termValue == $term->text) {
|
||||
$termInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset);
|
||||
} else {
|
||||
$termInfo = null;
|
||||
}
|
||||
|
||||
// Put loaded termInfo into cache
|
||||
$this->_termInfoCache[$termKey] = $termInfo;
|
||||
|
||||
if (count($this->_termInfoCache) == 1024) {
|
||||
$this->_cleanUpTermInfoCache();
|
||||
}
|
||||
|
||||
return $termInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns IDs of all the documents containing term.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param integer $shift
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
* @return array
|
||||
*/
|
||||
public function termDocs(Zend_Search_Lucene_Index_Term $term, $shift = 0, $docsFilter = null)
|
||||
{
|
||||
$termInfo = $this->getTermInfo($term);
|
||||
|
||||
if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
|
||||
if ($docsFilter !== null && $docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
|
||||
$docsFilter->segmentFilters[$this->_name] = array();
|
||||
}
|
||||
return array();
|
||||
}
|
||||
|
||||
$frqFile = $this->openCompoundFile('.frq');
|
||||
$frqFile->seek($termInfo->freqPointer,SEEK_CUR);
|
||||
$docId = 0;
|
||||
$result = array();
|
||||
|
||||
if ($docsFilter !== null) {
|
||||
if (!$docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Documents filter must be an instance of Zend_Search_Lucene_Index_DocsFilter or null.');
|
||||
}
|
||||
|
||||
if (isset($docsFilter->segmentFilters[$this->_name])) {
|
||||
// Filter already has some data for the current segment
|
||||
|
||||
// Make short name for the filter (which doesn't need additional dereferencing)
|
||||
$filter = &$docsFilter->segmentFilters[$this->_name];
|
||||
|
||||
// Check if filter is not empty
|
||||
if (count($filter) == 0) {
|
||||
return array();
|
||||
}
|
||||
|
||||
if ($this->_docCount/count($filter) < self::FULL_SCAN_VS_FETCH_BOUNDARY) {
|
||||
// Perform fetching
|
||||
// ---------------------------------------------------------------
|
||||
$updatedFilterData = array();
|
||||
|
||||
for( $count=0; $count < $termInfo->docFreq; $count++ ) {
|
||||
$docDelta = $frqFile->readVInt();
|
||||
if( $docDelta % 2 == 1 ) {
|
||||
$docId += ($docDelta-1)/2;
|
||||
} else {
|
||||
$docId += $docDelta/2;
|
||||
// read freq
|
||||
$frqFile->readVInt();
|
||||
}
|
||||
|
||||
if (isset($filter[$docId])) {
|
||||
$result[] = $shift + $docId;
|
||||
$updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
|
||||
}
|
||||
}
|
||||
$docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
|
||||
// ---------------------------------------------------------------
|
||||
} else {
|
||||
// Perform full scan
|
||||
$updatedFilterData = array();
|
||||
|
||||
for( $count=0; $count < $termInfo->docFreq; $count++ ) {
|
||||
$docDelta = $frqFile->readVInt();
|
||||
if( $docDelta % 2 == 1 ) {
|
||||
$docId += ($docDelta-1)/2;
|
||||
} else {
|
||||
$docId += $docDelta/2;
|
||||
// read freq
|
||||
$frqFile->readVInt();
|
||||
}
|
||||
|
||||
if (isset($filter[$docId])) {
|
||||
$result[] = $shift + $docId;
|
||||
$updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
|
||||
}
|
||||
}
|
||||
$docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
|
||||
}
|
||||
} else {
|
||||
// Filter is present, but doesn't has data for the current segment yet
|
||||
$filterData = array();
|
||||
for( $count=0; $count < $termInfo->docFreq; $count++ ) {
|
||||
$docDelta = $frqFile->readVInt();
|
||||
if( $docDelta % 2 == 1 ) {
|
||||
$docId += ($docDelta-1)/2;
|
||||
} else {
|
||||
$docId += $docDelta/2;
|
||||
// read freq
|
||||
$frqFile->readVInt();
|
||||
}
|
||||
|
||||
$result[] = $shift + $docId;
|
||||
$filterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
|
||||
}
|
||||
$docsFilter->segmentFilters[$this->_name] = $filterData;
|
||||
}
|
||||
} else {
|
||||
for( $count=0; $count < $termInfo->docFreq; $count++ ) {
|
||||
$docDelta = $frqFile->readVInt();
|
||||
if( $docDelta % 2 == 1 ) {
|
||||
$docId += ($docDelta-1)/2;
|
||||
} else {
|
||||
$docId += $docDelta/2;
|
||||
// read freq
|
||||
$frqFile->readVInt();
|
||||
}
|
||||
|
||||
$result[] = $shift + $docId;
|
||||
}
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns term freqs array.
|
||||
* Result array structure: array(docId => freq, ...)
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param integer $shift
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
* @return Zend_Search_Lucene_Index_TermInfo
|
||||
*/
|
||||
public function termFreqs(Zend_Search_Lucene_Index_Term $term, $shift = 0, $docsFilter = null)
|
||||
{
|
||||
$termInfo = $this->getTermInfo($term);
|
||||
|
||||
if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
|
||||
if ($docsFilter !== null && $docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
|
||||
$docsFilter->segmentFilters[$this->_name] = array();
|
||||
}
|
||||
return array();
|
||||
}
|
||||
|
||||
$frqFile = $this->openCompoundFile('.frq');
|
||||
$frqFile->seek($termInfo->freqPointer,SEEK_CUR);
|
||||
$result = array();
|
||||
$docId = 0;
|
||||
|
||||
$result = array();
|
||||
|
||||
if ($docsFilter !== null) {
|
||||
if (!$docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Documents filter must be an instance of Zend_Search_Lucene_Index_DocsFilter or null.');
|
||||
}
|
||||
|
||||
if (isset($docsFilter->segmentFilters[$this->_name])) {
|
||||
// Filter already has some data for the current segment
|
||||
|
||||
// Make short name for the filter (which doesn't need additional dereferencing)
|
||||
$filter = &$docsFilter->segmentFilters[$this->_name];
|
||||
|
||||
// Check if filter is not empty
|
||||
if (count($filter) == 0) {
|
||||
return array();
|
||||
}
|
||||
|
||||
|
||||
if ($this->_docCount/count($filter) < self::FULL_SCAN_VS_FETCH_BOUNDARY) {
|
||||
// Perform fetching
|
||||
// ---------------------------------------------------------------
|
||||
$updatedFilterData = array();
|
||||
|
||||
for ($count = 0; $count < $termInfo->docFreq; $count++) {
|
||||
$docDelta = $frqFile->readVInt();
|
||||
if ($docDelta % 2 == 1) {
|
||||
$docId += ($docDelta-1)/2;
|
||||
if (isset($filter[$docId])) {
|
||||
$result[$shift + $docId] = 1;
|
||||
$updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
|
||||
}
|
||||
} else {
|
||||
$docId += $docDelta/2;
|
||||
if (isset($filter[$docId])) {
|
||||
$result[$shift + $docId] = $frqFile->readVInt();
|
||||
$updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
|
||||
}
|
||||
}
|
||||
}
|
||||
$docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
|
||||
// ---------------------------------------------------------------
|
||||
} else {
|
||||
// Perform full scan
|
||||
$updatedFilterData = array();
|
||||
|
||||
for ($count = 0; $count < $termInfo->docFreq; $count++) {
|
||||
$docDelta = $frqFile->readVInt();
|
||||
if ($docDelta % 2 == 1) {
|
||||
$docId += ($docDelta-1)/2;
|
||||
if (isset($filter[$docId])) {
|
||||
$result[$shift + $docId] = 1;
|
||||
$updatedFilterData[$docId] = 1; // 1 is just some constant value, so we don't need additional var dereference here
|
||||
}
|
||||
} else {
|
||||
$docId += $docDelta/2;
|
||||
if (isset($filter[$docId])) {
|
||||
$result[$shift + $docId] = $frqFile->readVInt();
|
||||
$updatedFilterData[$docId] = 1; // 1 is just some constant value, so we don't need additional var dereference here
|
||||
}
|
||||
}
|
||||
}
|
||||
$docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
|
||||
}
|
||||
} else {
|
||||
// Filter doesn't has data for current segment
|
||||
$filterData = array();
|
||||
|
||||
for ($count = 0; $count < $termInfo->docFreq; $count++) {
|
||||
$docDelta = $frqFile->readVInt();
|
||||
if ($docDelta % 2 == 1) {
|
||||
$docId += ($docDelta-1)/2;
|
||||
$result[$shift + $docId] = 1;
|
||||
$filterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
|
||||
} else {
|
||||
$docId += $docDelta/2;
|
||||
$result[$shift + $docId] = $frqFile->readVInt();
|
||||
$filterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
|
||||
}
|
||||
}
|
||||
|
||||
$docsFilter->segmentFilters[$this->_name] = $filterData;
|
||||
}
|
||||
} else {
|
||||
for ($count = 0; $count < $termInfo->docFreq; $count++) {
|
||||
$docDelta = $frqFile->readVInt();
|
||||
if ($docDelta % 2 == 1) {
|
||||
$docId += ($docDelta-1)/2;
|
||||
$result[$shift + $docId] = 1;
|
||||
} else {
|
||||
$docId += $docDelta/2;
|
||||
$result[$shift + $docId] = $frqFile->readVInt();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns term positions array.
|
||||
* Result array structure: array(docId => array(pos1, pos2, ...), ...)
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param integer $shift
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
* @return Zend_Search_Lucene_Index_TermInfo
|
||||
*/
|
||||
public function termPositions(Zend_Search_Lucene_Index_Term $term, $shift = 0, $docsFilter = null)
|
||||
{
|
||||
$termInfo = $this->getTermInfo($term);
|
||||
|
||||
if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
|
||||
if ($docsFilter !== null && $docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
|
||||
$docsFilter->segmentFilters[$this->_name] = array();
|
||||
}
|
||||
return array();
|
||||
}
|
||||
|
||||
$frqFile = $this->openCompoundFile('.frq');
|
||||
$frqFile->seek($termInfo->freqPointer,SEEK_CUR);
|
||||
|
||||
$docId = 0;
|
||||
$freqs = array();
|
||||
|
||||
|
||||
if ($docsFilter !== null) {
|
||||
if (!$docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Documents filter must be an instance of Zend_Search_Lucene_Index_DocsFilter or null.');
|
||||
}
|
||||
|
||||
if (isset($docsFilter->segmentFilters[$this->_name])) {
|
||||
// Filter already has some data for the current segment
|
||||
|
||||
// Make short name for the filter (which doesn't need additional dereferencing)
|
||||
$filter = &$docsFilter->segmentFilters[$this->_name];
|
||||
|
||||
// Check if filter is not empty
|
||||
if (count($filter) == 0) {
|
||||
return array();
|
||||
}
|
||||
|
||||
if ($this->_docCount/count($filter) < self::FULL_SCAN_VS_FETCH_BOUNDARY) {
|
||||
// Perform fetching
|
||||
// ---------------------------------------------------------------
|
||||
for ($count = 0; $count < $termInfo->docFreq; $count++) {
|
||||
$docDelta = $frqFile->readVInt();
|
||||
if ($docDelta % 2 == 1) {
|
||||
$docId += ($docDelta-1)/2;
|
||||
$freqs[$docId] = 1;
|
||||
} else {
|
||||
$docId += $docDelta/2;
|
||||
$freqs[$docId] = $frqFile->readVInt();
|
||||
}
|
||||
}
|
||||
|
||||
$updatedFilterData = array();
|
||||
$result = array();
|
||||
$prxFile = $this->openCompoundFile('.prx');
|
||||
$prxFile->seek($termInfo->proxPointer, SEEK_CUR);
|
||||
foreach ($freqs as $docId => $freq) {
|
||||
$termPosition = 0;
|
||||
$positions = array();
|
||||
|
||||
// we have to read .prx file to get right position for next doc
|
||||
// even filter doesn't match current document
|
||||
for ($count = 0; $count < $freq; $count++ ) {
|
||||
$termPosition += $prxFile->readVInt();
|
||||
$positions[] = $termPosition;
|
||||
}
|
||||
|
||||
// Include into updated filter and into result only if doc is matched by filter
|
||||
if (isset($filter[$docId])) {
|
||||
$updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
|
||||
$result[$shift + $docId] = $positions;
|
||||
}
|
||||
}
|
||||
|
||||
$docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
|
||||
// ---------------------------------------------------------------
|
||||
} else {
|
||||
// Perform full scan
|
||||
for ($count = 0; $count < $termInfo->docFreq; $count++) {
|
||||
$docDelta = $frqFile->readVInt();
|
||||
if ($docDelta % 2 == 1) {
|
||||
$docId += ($docDelta-1)/2;
|
||||
$freqs[$docId] = 1;
|
||||
} else {
|
||||
$docId += $docDelta/2;
|
||||
$freqs[$docId] = $frqFile->readVInt();
|
||||
}
|
||||
}
|
||||
|
||||
$updatedFilterData = array();
|
||||
$result = array();
|
||||
$prxFile = $this->openCompoundFile('.prx');
|
||||
$prxFile->seek($termInfo->proxPointer, SEEK_CUR);
|
||||
foreach ($freqs as $docId => $freq) {
|
||||
$termPosition = 0;
|
||||
$positions = array();
|
||||
|
||||
// we have to read .prx file to get right position for next doc
|
||||
// even filter doesn't match current document
|
||||
for ($count = 0; $count < $freq; $count++ ) {
|
||||
$termPosition += $prxFile->readVInt();
|
||||
$positions[] = $termPosition;
|
||||
}
|
||||
|
||||
// Include into updated filter and into result only if doc is matched by filter
|
||||
if (isset($filter[$docId])) {
|
||||
$updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
|
||||
$result[$shift + $docId] = $positions;
|
||||
}
|
||||
}
|
||||
|
||||
$docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
|
||||
}
|
||||
} else {
|
||||
// Filter doesn't has data for current segment
|
||||
for ($count = 0; $count < $termInfo->docFreq; $count++) {
|
||||
$docDelta = $frqFile->readVInt();
|
||||
if ($docDelta % 2 == 1) {
|
||||
$docId += ($docDelta-1)/2;
|
||||
$freqs[$docId] = 1;
|
||||
} else {
|
||||
$docId += $docDelta/2;
|
||||
$freqs[$docId] = $frqFile->readVInt();
|
||||
}
|
||||
}
|
||||
|
||||
$filterData = array();
|
||||
$result = array();
|
||||
$prxFile = $this->openCompoundFile('.prx');
|
||||
$prxFile->seek($termInfo->proxPointer, SEEK_CUR);
|
||||
foreach ($freqs as $docId => $freq) {
|
||||
$filterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
|
||||
|
||||
$termPosition = 0;
|
||||
$positions = array();
|
||||
|
||||
for ($count = 0; $count < $freq; $count++ ) {
|
||||
$termPosition += $prxFile->readVInt();
|
||||
$positions[] = $termPosition;
|
||||
}
|
||||
|
||||
$result[$shift + $docId] = $positions;
|
||||
}
|
||||
|
||||
$docsFilter->segmentFilters[$this->_name] = $filterData;
|
||||
}
|
||||
} else {
|
||||
for ($count = 0; $count < $termInfo->docFreq; $count++) {
|
||||
$docDelta = $frqFile->readVInt();
|
||||
if ($docDelta % 2 == 1) {
|
||||
$docId += ($docDelta-1)/2;
|
||||
$freqs[$docId] = 1;
|
||||
} else {
|
||||
$docId += $docDelta/2;
|
||||
$freqs[$docId] = $frqFile->readVInt();
|
||||
}
|
||||
}
|
||||
|
||||
$result = array();
|
||||
$prxFile = $this->openCompoundFile('.prx');
|
||||
$prxFile->seek($termInfo->proxPointer, SEEK_CUR);
|
||||
foreach ($freqs as $docId => $freq) {
|
||||
$termPosition = 0;
|
||||
$positions = array();
|
||||
|
||||
for ($count = 0; $count < $freq; $count++ ) {
|
||||
$termPosition += $prxFile->readVInt();
|
||||
$positions[] = $termPosition;
|
||||
}
|
||||
|
||||
$result[$shift + $docId] = $positions;
|
||||
}
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load normalizatin factors from an index file
|
||||
*
|
||||
* @param integer $fieldNum
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
private function _loadNorm($fieldNum)
|
||||
{
|
||||
if ($this->_hasSingleNormFile) {
|
||||
$normfFile = $this->openCompoundFile('.nrm');
|
||||
|
||||
$header = $normfFile->readBytes(3);
|
||||
$headerFormatVersion = $normfFile->readByte();
|
||||
|
||||
if ($header != 'NRM' || $headerFormatVersion != (int)0xFF) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Wrong norms file format.');
|
||||
}
|
||||
|
||||
foreach ($this->_fields as $fNum => $fieldInfo) {
|
||||
if ($fieldInfo->isIndexed) {
|
||||
$this->_norms[$fNum] = $normfFile->readBytes($this->_docCount);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
$fFile = $this->openCompoundFile('.f' . $fieldNum);
|
||||
$this->_norms[$fieldNum] = $fFile->readBytes($this->_docCount);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns normalization factor for specified documents
|
||||
*
|
||||
* @param integer $id
|
||||
* @param string $fieldName
|
||||
* @return float
|
||||
*/
|
||||
public function norm($id, $fieldName)
|
||||
{
|
||||
$fieldNum = $this->getFieldNum($fieldName);
|
||||
|
||||
if ( !($this->_fields[$fieldNum]->isIndexed) ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!isset($this->_norms[$fieldNum])) {
|
||||
$this->_loadNorm($fieldNum);
|
||||
}
|
||||
|
||||
return Zend_Search_Lucene_Search_Similarity::decodeNorm( ord($this->_norms[$fieldNum][$id]) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns norm vector, encoded in a byte string
|
||||
*
|
||||
* @param string $fieldName
|
||||
* @return string
|
||||
*/
|
||||
public function normVector($fieldName)
|
||||
{
|
||||
$fieldNum = $this->getFieldNum($fieldName);
|
||||
|
||||
if ($fieldNum == -1 || !($this->_fields[$fieldNum]->isIndexed)) {
|
||||
$similarity = Zend_Search_Lucene_Search_Similarity::getDefault();
|
||||
|
||||
return str_repeat(chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )),
|
||||
$this->_docCount);
|
||||
}
|
||||
|
||||
if (!isset($this->_norms[$fieldNum])) {
|
||||
$this->_loadNorm($fieldNum);
|
||||
}
|
||||
|
||||
return $this->_norms[$fieldNum];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns true if any documents have been deleted from this index segment.
|
||||
*
|
||||
* @return boolean
|
||||
*/
|
||||
public function hasDeletions()
|
||||
{
|
||||
return $this->_deleted !== null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns true if segment has single norms file.
|
||||
*
|
||||
* @return boolean
|
||||
*/
|
||||
public function hasSingleNormFile()
|
||||
{
|
||||
return $this->_hasSingleNormFile ? true : false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if segment is stored using compound segment file.
|
||||
*
|
||||
* @return boolean
|
||||
*/
|
||||
public function isCompound()
|
||||
{
|
||||
return $this->_isCompound;
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes a document from the index segment.
|
||||
* $id is an internal document id
|
||||
*
|
||||
* @param integer
|
||||
*/
|
||||
public function delete($id)
|
||||
{
|
||||
$this->_deletedDirty = true;
|
||||
|
||||
if (extension_loaded('bitset')) {
|
||||
if ($this->_deleted === null) {
|
||||
$this->_deleted = bitset_empty($id);
|
||||
}
|
||||
bitset_incl($this->_deleted, $id);
|
||||
} else {
|
||||
if ($this->_deleted === null) {
|
||||
$this->_deleted = array();
|
||||
}
|
||||
|
||||
$this->_deleted[$id] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks, that document is deleted
|
||||
*
|
||||
* @param integer
|
||||
* @return boolean
|
||||
*/
|
||||
public function isDeleted($id)
|
||||
{
|
||||
if ($this->_deleted === null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (extension_loaded('bitset')) {
|
||||
return bitset_in($this->_deleted, $id);
|
||||
} else {
|
||||
return isset($this->_deleted[$id]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect latest delete generation
|
||||
*
|
||||
* Is actualy used from writeChanges() method or from the constructor if it's invoked from
|
||||
* Index writer. In both cases index write lock is already obtained, so we shouldn't care
|
||||
* about it
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
private function _detectLatestDelGen()
|
||||
{
|
||||
$delFileList = array();
|
||||
foreach ($this->_directory->fileList() as $file) {
|
||||
if ($file == $this->_name . '.del') {
|
||||
// Matches <segment_name>.del file name
|
||||
$delFileList[] = 0;
|
||||
} else if (preg_match('/^' . $this->_name . '_([a-zA-Z0-9]+)\.del$/i', $file, $matches)) {
|
||||
// Matches <segment_name>_NNN.del file names
|
||||
$delFileList[] = (int)base_convert($matches[1], 36, 10);
|
||||
}
|
||||
}
|
||||
|
||||
if (count($delFileList) == 0) {
|
||||
// There is no deletions file for current segment in the directory
|
||||
// Set deletions file generation number to 1
|
||||
return -1;
|
||||
} else {
|
||||
// There are some deletions files for current segment in the directory
|
||||
// Set deletions file generation number to the highest nuber
|
||||
return max($delFileList);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Write changes if it's necessary.
|
||||
*
|
||||
* This method must be invoked only from the Writer _updateSegments() method,
|
||||
* so index Write lock has to be already obtained.
|
||||
*
|
||||
* @internal
|
||||
* @throws Zend_Search_Lucene_Exceptions
|
||||
*/
|
||||
public function writeChanges()
|
||||
{
|
||||
// Get new generation number
|
||||
$latestDelGen = $this->_detectLatestDelGen();
|
||||
|
||||
if (!$this->_deletedDirty) {
|
||||
// There was no deletions by current process
|
||||
|
||||
if ($latestDelGen == $this->_delGen) {
|
||||
// Delete file hasn't been updated by any concurrent process
|
||||
return;
|
||||
} else if ($latestDelGen > $this->_delGen) {
|
||||
// Delete file has been updated by some concurrent process
|
||||
// Reload deletions file
|
||||
$this->_delGen = $latestDelGen;
|
||||
$this->_deleted = $this->_loadDelFile();
|
||||
|
||||
return;
|
||||
} else {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Delete file processing workflow is corrupted for the segment \'' . $this->_name . '\'.');
|
||||
}
|
||||
}
|
||||
|
||||
if ($latestDelGen > $this->_delGen) {
|
||||
// Merge current deletions with latest deletions file
|
||||
$this->_delGen = $latestDelGen;
|
||||
|
||||
$latestDelete = $this->_loadDelFile();
|
||||
|
||||
if (extension_loaded('bitset')) {
|
||||
$this->_deleted = bitset_union($this->_deleted, $latestDelete);
|
||||
} else {
|
||||
$this->_deleted += $latestDelete;
|
||||
}
|
||||
}
|
||||
|
||||
if (extension_loaded('bitset')) {
|
||||
$delBytes = $this->_deleted;
|
||||
$bitCount = count(bitset_to_array($delBytes));
|
||||
} else {
|
||||
$byteCount = floor($this->_docCount/8)+1;
|
||||
$delBytes = str_repeat(chr(0), $byteCount);
|
||||
for ($count = 0; $count < $byteCount; $count++) {
|
||||
$byte = 0;
|
||||
for ($bit = 0; $bit < 8; $bit++) {
|
||||
if (isset($this->_deleted[$count*8 + $bit])) {
|
||||
$byte |= (1<<$bit);
|
||||
}
|
||||
}
|
||||
$delBytes[$count] = chr($byte);
|
||||
}
|
||||
$bitCount = count($this->_deleted);
|
||||
}
|
||||
|
||||
if ($this->_delGen == -1) {
|
||||
// Set delete file generation number to 1
|
||||
$this->_delGen = 1;
|
||||
} else {
|
||||
// Increase delete file generation number by 1
|
||||
$this->_delGen++;
|
||||
}
|
||||
|
||||
$delFile = $this->_directory->createFile($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
|
||||
$delFile->writeInt($this->_docCount);
|
||||
$delFile->writeInt($bitCount);
|
||||
$delFile->writeBytes($delBytes);
|
||||
|
||||
$this->_deletedDirty = false;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Term Dictionary File object for stream like terms reading
|
||||
*
|
||||
* @var Zend_Search_Lucene_Storage_File
|
||||
*/
|
||||
private $_tisFile = null;
|
||||
|
||||
/**
|
||||
* Actual offset of the .tis file data
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_tisFileOffset;
|
||||
|
||||
/**
|
||||
* Frequencies File object for stream like terms reading
|
||||
*
|
||||
* @var Zend_Search_Lucene_Storage_File
|
||||
*/
|
||||
private $_frqFile = null;
|
||||
|
||||
/**
|
||||
* Actual offset of the .frq file data
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_frqFileOffset;
|
||||
|
||||
/**
|
||||
* Positions File object for stream like terms reading
|
||||
*
|
||||
* @var Zend_Search_Lucene_Storage_File
|
||||
*/
|
||||
private $_prxFile = null;
|
||||
|
||||
/**
|
||||
* Actual offset of the .prx file in the compound file
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_prxFileOffset;
|
||||
|
||||
|
||||
/**
|
||||
* Actual number of terms in term stream
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_termCount = 0;
|
||||
|
||||
/**
|
||||
* Overall number of terms in term stream
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_termNum = 0;
|
||||
|
||||
/**
|
||||
* Segment index interval
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_indexInterval;
|
||||
|
||||
/**
|
||||
* Segment skip interval
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_skipInterval;
|
||||
|
||||
/**
|
||||
* Last TermInfo in a terms stream
|
||||
*
|
||||
* @var Zend_Search_Lucene_Index_TermInfo
|
||||
*/
|
||||
private $_lastTermInfo = null;
|
||||
|
||||
/**
|
||||
* Last Term in a terms stream
|
||||
*
|
||||
* @var Zend_Search_Lucene_Index_Term
|
||||
*/
|
||||
private $_lastTerm = null;
|
||||
|
||||
/**
|
||||
* Map of the document IDs
|
||||
* Used to get new docID after removing deleted documents.
|
||||
* It's not very effective from memory usage point of view,
|
||||
* but much more faster, then other methods
|
||||
*
|
||||
* @var array|null
|
||||
*/
|
||||
private $_docMap = null;
|
||||
|
||||
/**
|
||||
* An array of all term positions in the documents.
|
||||
* Array structure: array( docId => array( pos1, pos2, ...), ...)
|
||||
*
|
||||
* Is set to null if term positions loading has to be skipped
|
||||
*
|
||||
* @var array|null
|
||||
*/
|
||||
private $_lastTermPositions;
|
||||
|
||||
|
||||
/**
|
||||
* Terms scan mode
|
||||
*
|
||||
* Values:
|
||||
*
|
||||
* self::SM_TERMS_ONLY - terms are scanned, no additional info is retrieved
|
||||
* self::SM_FULL_INFO - terms are scanned, frequency and position info is retrieved
|
||||
* self::SM_MERGE_INFO - terms are scanned, frequency and position info is retrieved
|
||||
* document numbers are compacted (shifted if segment has deleted documents)
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_termsScanMode;
|
||||
|
||||
/** Scan modes */
|
||||
const SM_TERMS_ONLY = 0; // terms are scanned, no additional info is retrieved
|
||||
const SM_FULL_INFO = 1; // terms are scanned, frequency and position info is retrieved
|
||||
const SM_MERGE_INFO = 2; // terms are scanned, frequency and position info is retrieved
|
||||
// document numbers are compacted (shifted if segment contains deleted documents)
|
||||
|
||||
/**
|
||||
* Reset terms stream
|
||||
*
|
||||
* $startId - id for the fist document
|
||||
* $compact - remove deleted documents
|
||||
*
|
||||
* Returns start document id for the next segment
|
||||
*
|
||||
* @param integer $startId
|
||||
* @param integer $mode
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
* @return integer
|
||||
*/
|
||||
public function resetTermsStream(/** $startId = 0, $mode = self::SM_TERMS_ONLY */)
|
||||
{
|
||||
/**
|
||||
* SegmentInfo->resetTermsStream() method actually takes two optional parameters:
|
||||
* $startId (default value is 0)
|
||||
* $mode (default value is self::SM_TERMS_ONLY)
|
||||
*/
|
||||
$argList = func_get_args();
|
||||
if (count($argList) > 2) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Wrong number of arguments');
|
||||
} else if (count($argList) == 2) {
|
||||
$startId = $argList[0];
|
||||
$mode = $argList[1];
|
||||
} else if (count($argList) == 1) {
|
||||
$startId = $argList[0];
|
||||
$mode = self::SM_TERMS_ONLY;
|
||||
} else {
|
||||
$startId = 0;
|
||||
$mode = self::SM_TERMS_ONLY;
|
||||
}
|
||||
|
||||
if ($this->_tisFile !== null) {
|
||||
$this->_tisFile = null;
|
||||
}
|
||||
|
||||
$this->_tisFile = $this->openCompoundFile('.tis', false);
|
||||
$this->_tisFileOffset = $this->_tisFile->tell();
|
||||
|
||||
$tiVersion = $this->_tisFile->readInt();
|
||||
if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */ &&
|
||||
$tiVersion != (int)0xFFFFFFFD /* 2.1+ format */) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
|
||||
}
|
||||
|
||||
$this->_termCount =
|
||||
$this->_termNum = $this->_tisFile->readLong(); // Read terms count
|
||||
$this->_indexInterval = $this->_tisFile->readInt(); // Read Index interval
|
||||
$this->_skipInterval = $this->_tisFile->readInt(); // Read skip interval
|
||||
if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) {
|
||||
$maxSkipLevels = $this->_tisFile->readInt();
|
||||
}
|
||||
|
||||
if ($this->_frqFile !== null) {
|
||||
$this->_frqFile = null;
|
||||
}
|
||||
if ($this->_prxFile !== null) {
|
||||
$this->_prxFile = null;
|
||||
}
|
||||
$this->_docMap = array();
|
||||
|
||||
$this->_lastTerm = new Zend_Search_Lucene_Index_Term('', -1);
|
||||
$this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo(0, 0, 0, 0);
|
||||
$this->_lastTermPositions = null;
|
||||
|
||||
$this->_termsScanMode = $mode;
|
||||
|
||||
switch ($mode) {
|
||||
case self::SM_TERMS_ONLY:
|
||||
// Do nothing
|
||||
break;
|
||||
|
||||
case self::SM_FULL_INFO:
|
||||
// break intentionally omitted
|
||||
case self::SM_MERGE_INFO:
|
||||
$this->_frqFile = $this->openCompoundFile('.frq', false);
|
||||
$this->_frqFileOffset = $this->_frqFile->tell();
|
||||
|
||||
$this->_prxFile = $this->openCompoundFile('.prx', false);
|
||||
$this->_prxFileOffset = $this->_prxFile->tell();
|
||||
|
||||
for ($count = 0; $count < $this->_docCount; $count++) {
|
||||
if (!$this->isDeleted($count)) {
|
||||
$this->_docMap[$count] = $startId + (($mode == self::SM_MERGE_INFO) ? count($this->_docMap) : $count);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Wrong terms scaning mode specified.');
|
||||
break;
|
||||
}
|
||||
|
||||
// Calculate next segment start id (since $this->_docMap structure may be cleaned by $this->nextTerm() call)
|
||||
$nextSegmentStartId = $startId + (($mode == self::SM_MERGE_INFO) ? count($this->_docMap) : $this->_docCount);
|
||||
$this->nextTerm();
|
||||
|
||||
return $nextSegmentStartId;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Skip terms stream up to specified term preffix.
|
||||
*
|
||||
* Prefix contains fully specified field info and portion of searched term
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $prefix
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
|
||||
{
|
||||
if ($this->_termDictionary === null) {
|
||||
$this->_loadDictionaryIndex();
|
||||
}
|
||||
|
||||
$searchField = $this->getFieldNum($prefix->field);
|
||||
|
||||
if ($searchField == -1) {
|
||||
/**
|
||||
* Field is not presented in this segment
|
||||
* Go to the end of dictionary
|
||||
*/
|
||||
$this->_tisFile = null;
|
||||
$this->_frqFile = null;
|
||||
$this->_prxFile = null;
|
||||
|
||||
$this->_lastTerm = null;
|
||||
$this->_lastTermInfo = null;
|
||||
$this->_lastTermPositions = null;
|
||||
|
||||
return;
|
||||
}
|
||||
$searchDicField = $this->_getFieldPosition($searchField);
|
||||
|
||||
// search for appropriate value in dictionary
|
||||
$lowIndex = 0;
|
||||
$highIndex = count($this->_termDictionary)-1;
|
||||
while ($highIndex >= $lowIndex) {
|
||||
// $mid = ($highIndex - $lowIndex)/2;
|
||||
$mid = ($highIndex + $lowIndex) >> 1;
|
||||
$midTerm = $this->_termDictionary[$mid];
|
||||
|
||||
$fieldNum = $this->_getFieldPosition($midTerm[0] /* field */);
|
||||
$delta = $searchDicField - $fieldNum;
|
||||
if ($delta == 0) {
|
||||
$delta = strcmp($prefix->text, $midTerm[1] /* text */);
|
||||
}
|
||||
|
||||
if ($delta < 0) {
|
||||
$highIndex = $mid-1;
|
||||
} elseif ($delta > 0) {
|
||||
$lowIndex = $mid+1;
|
||||
} else {
|
||||
// We have reached term we are looking for
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ($highIndex == -1) {
|
||||
// Term is out of the dictionary range
|
||||
$this->_tisFile = null;
|
||||
$this->_frqFile = null;
|
||||
$this->_prxFile = null;
|
||||
|
||||
$this->_lastTerm = null;
|
||||
$this->_lastTermInfo = null;
|
||||
$this->_lastTermPositions = null;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
$prevPosition = $highIndex;
|
||||
$prevTerm = $this->_termDictionary[$prevPosition];
|
||||
$prevTermInfo = $this->_termDictionaryInfos[$prevPosition];
|
||||
|
||||
if ($this->_tisFile === null) {
|
||||
// The end of terms stream is reached and terms dictionary file is closed
|
||||
// Perform mini-reset operation
|
||||
$this->_tisFile = $this->openCompoundFile('.tis', false);
|
||||
|
||||
if ($this->_termsScanMode == self::SM_FULL_INFO || $this->_termsScanMode == self::SM_MERGE_INFO) {
|
||||
$this->_frqFile = $this->openCompoundFile('.frq', false);
|
||||
$this->_prxFile = $this->openCompoundFile('.prx', false);
|
||||
}
|
||||
}
|
||||
$this->_tisFile->seek($this->_tisFileOffset + $prevTermInfo[4], SEEK_SET);
|
||||
|
||||
$this->_lastTerm = new Zend_Search_Lucene_Index_Term($prevTerm[1] /* text */,
|
||||
($prevTerm[0] == -1) ? '' : $this->_fields[$prevTerm[0] /* field */]->name);
|
||||
$this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo($prevTermInfo[0] /* docFreq */,
|
||||
$prevTermInfo[1] /* freqPointer */,
|
||||
$prevTermInfo[2] /* proxPointer */,
|
||||
$prevTermInfo[3] /* skipOffset */);
|
||||
$this->_termCount = $this->_termNum - $prevPosition*$this->_indexInterval;
|
||||
|
||||
if ($highIndex == 0) {
|
||||
// skip start entry
|
||||
$this->nextTerm();
|
||||
} else if ($prefix->field == $this->_lastTerm->field && $prefix->text == $this->_lastTerm->text) {
|
||||
// We got exact match in the dictionary index
|
||||
|
||||
if ($this->_termsScanMode == self::SM_FULL_INFO || $this->_termsScanMode == self::SM_MERGE_INFO) {
|
||||
$this->_lastTermPositions = array();
|
||||
|
||||
$this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET);
|
||||
$freqs = array(); $docId = 0;
|
||||
for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) {
|
||||
$docDelta = $this->_frqFile->readVInt();
|
||||
if( $docDelta % 2 == 1 ) {
|
||||
$docId += ($docDelta-1)/2;
|
||||
$freqs[ $docId ] = 1;
|
||||
} else {
|
||||
$docId += $docDelta/2;
|
||||
$freqs[ $docId ] = $this->_frqFile->readVInt();
|
||||
}
|
||||
}
|
||||
|
||||
$this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET);
|
||||
foreach ($freqs as $docId => $freq) {
|
||||
$termPosition = 0; $positions = array();
|
||||
|
||||
for ($count = 0; $count < $freq; $count++ ) {
|
||||
$termPosition += $this->_prxFile->readVInt();
|
||||
$positions[] = $termPosition;
|
||||
}
|
||||
|
||||
if (isset($this->_docMap[$docId])) {
|
||||
$this->_lastTermPositions[$this->_docMap[$docId]] = $positions;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Search term matching specified prefix
|
||||
while ($this->_lastTerm !== null) {
|
||||
if ( strcmp($this->_lastTerm->field, $prefix->field) > 0 ||
|
||||
($prefix->field == $this->_lastTerm->field && strcmp($this->_lastTerm->text, $prefix->text) >= 0) ) {
|
||||
// Current term matches or greate than the pattern
|
||||
return;
|
||||
}
|
||||
|
||||
$this->nextTerm();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Scans terms dictionary and returns next term
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_Term|null
|
||||
*/
|
||||
public function nextTerm()
|
||||
{
|
||||
if ($this->_tisFile === null || $this->_termCount == 0) {
|
||||
$this->_lastTerm = null;
|
||||
$this->_lastTermInfo = null;
|
||||
$this->_lastTermPositions = null;
|
||||
$this->_docMap = null;
|
||||
|
||||
// may be necessary for "empty" segment
|
||||
$this->_tisFile = null;
|
||||
$this->_frqFile = null;
|
||||
$this->_prxFile = null;
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
$termPrefixLength = $this->_tisFile->readVInt();
|
||||
$termSuffix = $this->_tisFile->readString();
|
||||
$termFieldNum = $this->_tisFile->readVInt();
|
||||
$termValue = Zend_Search_Lucene_Index_Term::getPrefix($this->_lastTerm->text, $termPrefixLength) . $termSuffix;
|
||||
|
||||
$this->_lastTerm = new Zend_Search_Lucene_Index_Term($termValue, $this->_fields[$termFieldNum]->name);
|
||||
|
||||
$docFreq = $this->_tisFile->readVInt();
|
||||
$freqPointer = $this->_lastTermInfo->freqPointer + $this->_tisFile->readVInt();
|
||||
$proxPointer = $this->_lastTermInfo->proxPointer + $this->_tisFile->readVInt();
|
||||
if ($docFreq >= $this->_skipInterval) {
|
||||
$skipOffset = $this->_tisFile->readVInt();
|
||||
} else {
|
||||
$skipOffset = 0;
|
||||
}
|
||||
|
||||
$this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset);
|
||||
|
||||
|
||||
if ($this->_termsScanMode == self::SM_FULL_INFO || $this->_termsScanMode == self::SM_MERGE_INFO) {
|
||||
$this->_lastTermPositions = array();
|
||||
|
||||
$this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET);
|
||||
$freqs = array(); $docId = 0;
|
||||
for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) {
|
||||
$docDelta = $this->_frqFile->readVInt();
|
||||
if( $docDelta % 2 == 1 ) {
|
||||
$docId += ($docDelta-1)/2;
|
||||
$freqs[ $docId ] = 1;
|
||||
} else {
|
||||
$docId += $docDelta/2;
|
||||
$freqs[ $docId ] = $this->_frqFile->readVInt();
|
||||
}
|
||||
}
|
||||
|
||||
$this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET);
|
||||
foreach ($freqs as $docId => $freq) {
|
||||
$termPosition = 0; $positions = array();
|
||||
|
||||
for ($count = 0; $count < $freq; $count++ ) {
|
||||
$termPosition += $this->_prxFile->readVInt();
|
||||
$positions[] = $termPosition;
|
||||
}
|
||||
|
||||
if (isset($this->_docMap[$docId])) {
|
||||
$this->_lastTermPositions[$this->_docMap[$docId]] = $positions;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$this->_termCount--;
|
||||
if ($this->_termCount == 0) {
|
||||
$this->_tisFile = null;
|
||||
$this->_frqFile = null;
|
||||
$this->_prxFile = null;
|
||||
}
|
||||
|
||||
return $this->_lastTerm;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close terms stream
|
||||
*
|
||||
* Should be used for resources clean up if stream is not read up to the end
|
||||
*/
|
||||
public function closeTermsStream()
|
||||
{
|
||||
$this->_tisFile = null;
|
||||
$this->_frqFile = null;
|
||||
$this->_prxFile = null;
|
||||
|
||||
$this->_lastTerm = null;
|
||||
$this->_lastTermInfo = null;
|
||||
$this->_lastTermPositions = null;
|
||||
|
||||
$this->_docMap = null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns term in current position
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_Term|null
|
||||
*/
|
||||
public function currentTerm()
|
||||
{
|
||||
return $this->_lastTerm;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns an array of all term positions in the documents.
|
||||
* Return array structure: array( docId => array( pos1, pos2, ...), ...)
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function currentTermPositions()
|
||||
{
|
||||
return $this->_lastTermPositions;
|
||||
}
|
||||
}
|
||||
|
271
thirdparty/Zend/Search/Lucene/Index/SegmentMerger.php
vendored
Normal file
271
thirdparty/Zend/Search/Lucene/Index/SegmentMerger.php
vendored
Normal file
@ -0,0 +1,271 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: SegmentMerger.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
/** Zend_Search_Lucene_Index_SegmentInfo */
|
||||
require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Index_SegmentMerger
|
||||
{
|
||||
/**
|
||||
* Target segment writer
|
||||
*
|
||||
* @var Zend_Search_Lucene_Index_SegmentWriter_StreamWriter
|
||||
*/
|
||||
private $_writer;
|
||||
|
||||
/**
|
||||
* Number of docs in a new segment
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_docCount;
|
||||
|
||||
/**
|
||||
* A set of segments to be merged
|
||||
*
|
||||
* @var array Zend_Search_Lucene_Index_SegmentInfo
|
||||
*/
|
||||
private $_segmentInfos = array();
|
||||
|
||||
/**
|
||||
* Flag to signal, that merge is already done
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
private $_mergeDone = false;
|
||||
|
||||
/**
|
||||
* Field map
|
||||
* [<segment_name>][<field_number>] => <target_field_number>
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_fieldsMap = array();
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Object constructor.
|
||||
*
|
||||
* Creates new segment merger with $directory as target to merge segments into
|
||||
* and $name as a name of new segment
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_Directory $directory
|
||||
* @param string $name
|
||||
*/
|
||||
public function __construct($directory, $name)
|
||||
{
|
||||
/** Zend_Search_Lucene_Index_SegmentWriter_StreamWriter */
|
||||
require_once 'Zend/Search/Lucene/Index/SegmentWriter/StreamWriter.php';
|
||||
$this->_writer = new Zend_Search_Lucene_Index_SegmentWriter_StreamWriter($directory, $name);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add segmnet to a collection of segments to be merged
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_SegmentInfo $segment
|
||||
*/
|
||||
public function addSource(Zend_Search_Lucene_Index_SegmentInfo $segmentInfo)
|
||||
{
|
||||
$this->_segmentInfos[$segmentInfo->getName()] = $segmentInfo;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Do merge.
|
||||
*
|
||||
* Returns number of documents in newly created segment
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_SegmentInfo
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function merge()
|
||||
{
|
||||
if ($this->_mergeDone) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Merge is already done.');
|
||||
}
|
||||
|
||||
if (count($this->_segmentInfos) < 1) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Wrong number of segments to be merged ('
|
||||
. count($this->_segmentInfos)
|
||||
. ').');
|
||||
}
|
||||
|
||||
$this->_mergeFields();
|
||||
$this->_mergeNorms();
|
||||
$this->_mergeStoredFields();
|
||||
$this->_mergeTerms();
|
||||
|
||||
$this->_mergeDone = true;
|
||||
|
||||
return $this->_writer->close();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Merge fields information
|
||||
*/
|
||||
private function _mergeFields()
|
||||
{
|
||||
foreach ($this->_segmentInfos as $segName => $segmentInfo) {
|
||||
foreach ($segmentInfo->getFieldInfos() as $fieldInfo) {
|
||||
$this->_fieldsMap[$segName][$fieldInfo->number] = $this->_writer->addFieldInfo($fieldInfo);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge field's normalization factors
|
||||
*/
|
||||
private function _mergeNorms()
|
||||
{
|
||||
foreach ($this->_writer->getFieldInfos() as $fieldInfo) {
|
||||
if ($fieldInfo->isIndexed) {
|
||||
foreach ($this->_segmentInfos as $segName => $segmentInfo) {
|
||||
if ($segmentInfo->hasDeletions()) {
|
||||
$srcNorm = $segmentInfo->normVector($fieldInfo->name);
|
||||
$norm = '';
|
||||
$docs = $segmentInfo->count();
|
||||
for ($count = 0; $count < $docs; $count++) {
|
||||
if (!$segmentInfo->isDeleted($count)) {
|
||||
$norm .= $srcNorm[$count];
|
||||
}
|
||||
}
|
||||
$this->_writer->addNorm($fieldInfo->name, $norm);
|
||||
} else {
|
||||
$this->_writer->addNorm($fieldInfo->name, $segmentInfo->normVector($fieldInfo->name));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge fields information
|
||||
*/
|
||||
private function _mergeStoredFields()
|
||||
{
|
||||
$this->_docCount = 0;
|
||||
|
||||
foreach ($this->_segmentInfos as $segName => $segmentInfo) {
|
||||
$fdtFile = $segmentInfo->openCompoundFile('.fdt');
|
||||
|
||||
for ($count = 0; $count < $segmentInfo->count(); $count++) {
|
||||
$fieldCount = $fdtFile->readVInt();
|
||||
$storedFields = array();
|
||||
|
||||
for ($count2 = 0; $count2 < $fieldCount; $count2++) {
|
||||
$fieldNum = $fdtFile->readVInt();
|
||||
$bits = $fdtFile->readByte();
|
||||
$fieldInfo = $segmentInfo->getField($fieldNum);
|
||||
|
||||
if (!($bits & 2)) { // Text data
|
||||
$storedFields[] =
|
||||
new Zend_Search_Lucene_Field($fieldInfo->name,
|
||||
$fdtFile->readString(),
|
||||
'UTF-8',
|
||||
true,
|
||||
$fieldInfo->isIndexed,
|
||||
$bits & 1 );
|
||||
} else { // Binary data
|
||||
$storedFields[] =
|
||||
new Zend_Search_Lucene_Field($fieldInfo->name,
|
||||
$fdtFile->readBinary(),
|
||||
'',
|
||||
true,
|
||||
$fieldInfo->isIndexed,
|
||||
$bits & 1,
|
||||
true);
|
||||
}
|
||||
}
|
||||
|
||||
if (!$segmentInfo->isDeleted($count)) {
|
||||
$this->_docCount++;
|
||||
$this->_writer->addStoredFields($storedFields);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Merge fields information
|
||||
*/
|
||||
private function _mergeTerms()
|
||||
{
|
||||
/** Zend_Search_Lucene_Index_TermsPriorityQueue */
|
||||
require_once 'Zend/Search/Lucene/Index/TermsPriorityQueue.php';
|
||||
|
||||
$segmentInfoQueue = new Zend_Search_Lucene_Index_TermsPriorityQueue();
|
||||
|
||||
$segmentStartId = 0;
|
||||
foreach ($this->_segmentInfos as $segName => $segmentInfo) {
|
||||
$segmentStartId = $segmentInfo->resetTermsStream($segmentStartId, Zend_Search_Lucene_Index_SegmentInfo::SM_MERGE_INFO);
|
||||
|
||||
// Skip "empty" segments
|
||||
if ($segmentInfo->currentTerm() !== null) {
|
||||
$segmentInfoQueue->put($segmentInfo);
|
||||
}
|
||||
}
|
||||
|
||||
$this->_writer->initializeDictionaryFiles();
|
||||
|
||||
$termDocs = array();
|
||||
while (($segmentInfo = $segmentInfoQueue->pop()) !== null) {
|
||||
// Merge positions array
|
||||
$termDocs += $segmentInfo->currentTermPositions();
|
||||
|
||||
if ($segmentInfoQueue->top() === null ||
|
||||
$segmentInfoQueue->top()->currentTerm()->key() !=
|
||||
$segmentInfo->currentTerm()->key()) {
|
||||
// We got new term
|
||||
ksort($termDocs, SORT_NUMERIC);
|
||||
|
||||
// Add term if it's contained in any document
|
||||
if (count($termDocs) > 0) {
|
||||
$this->_writer->addTerm($segmentInfo->currentTerm(), $termDocs);
|
||||
}
|
||||
$termDocs = array();
|
||||
}
|
||||
|
||||
$segmentInfo->nextTerm();
|
||||
// check, if segment dictionary is finished
|
||||
if ($segmentInfo->currentTerm() !== null) {
|
||||
// Put segment back into the priority queue
|
||||
$segmentInfoQueue->put($segmentInfo);
|
||||
}
|
||||
}
|
||||
|
||||
$this->_writer->closeDictionaryFiles();
|
||||
}
|
||||
}
|
634
thirdparty/Zend/Search/Lucene/Index/SegmentWriter.php
vendored
Normal file
634
thirdparty/Zend/Search/Lucene/Index/SegmentWriter.php
vendored
Normal file
@ -0,0 +1,634 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: SegmentWriter.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Index_FieldInfo */
|
||||
require_once 'Zend/Search/Lucene/Index/FieldInfo.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_Term */
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_TermInfo */
|
||||
require_once 'Zend/Search/Lucene/Index/TermInfo.php';
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
abstract class Zend_Search_Lucene_Index_SegmentWriter
|
||||
{
|
||||
/**
|
||||
* Expert: The fraction of terms in the "dictionary" which should be stored
|
||||
* in RAM. Smaller values use more memory, but make searching slightly
|
||||
* faster, while larger values use less memory and make searching slightly
|
||||
* slower. Searching is typically not dominated by dictionary lookup, so
|
||||
* tweaking this is rarely useful.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
public static $indexInterval = 128;
|
||||
|
||||
/**
|
||||
* Expert: The fraction of TermDocs entries stored in skip tables.
|
||||
* Larger values result in smaller indexes, greater acceleration, but fewer
|
||||
* accelerable cases, while smaller values result in bigger indexes,
|
||||
* less acceleration and more
|
||||
* accelerable cases. More detailed experiments would be useful here.
|
||||
*
|
||||
* 0x7FFFFFFF indicates that we don't use skip data
|
||||
*
|
||||
* Note: not used in current implementation
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
public static $skipInterval = 0x7FFFFFFF;
|
||||
|
||||
/**
|
||||
* Expert: The maximum number of skip levels. Smaller values result in
|
||||
* slightly smaller indexes, but slower skipping in big posting lists.
|
||||
*
|
||||
* 0 indicates that we don't use skip data
|
||||
*
|
||||
* Note: not used in current implementation
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
public static $maxSkipLevels = 0;
|
||||
|
||||
/**
|
||||
* Number of docs in a segment
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
protected $_docCount = 0;
|
||||
|
||||
/**
|
||||
* Segment name
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $_name;
|
||||
|
||||
/**
|
||||
* File system adapter.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Storage_Directory
|
||||
*/
|
||||
protected $_directory;
|
||||
|
||||
/**
|
||||
* List of the index files.
|
||||
* Used for automatic compound file generation
|
||||
*
|
||||
* @var unknown_type
|
||||
*/
|
||||
protected $_files = array();
|
||||
|
||||
/**
|
||||
* Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $_fields = array();
|
||||
|
||||
/**
|
||||
* Normalization factors.
|
||||
* An array fieldName => normVector
|
||||
* normVector is a binary string.
|
||||
* Each byte corresponds to an indexed document in a segment and
|
||||
* encodes normalization factor (float value, encoded by
|
||||
* Zend_Search_Lucene_Search_Similarity::encodeNorm())
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $_norms = array();
|
||||
|
||||
|
||||
/**
|
||||
* '.fdx' file - Stored Fields, the field index.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Storage_File
|
||||
*/
|
||||
protected $_fdxFile = null;
|
||||
|
||||
/**
|
||||
* '.fdt' file - Stored Fields, the field data.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Storage_File
|
||||
*/
|
||||
protected $_fdtFile = null;
|
||||
|
||||
|
||||
/**
|
||||
* Object constructor.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_Directory $directory
|
||||
* @param string $name
|
||||
*/
|
||||
public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name)
|
||||
{
|
||||
$this->_directory = $directory;
|
||||
$this->_name = $name;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add field to the segment
|
||||
*
|
||||
* Returns actual field number
|
||||
*
|
||||
* @param Zend_Search_Lucene_Field $field
|
||||
* @return integer
|
||||
*/
|
||||
public function addField(Zend_Search_Lucene_Field $field)
|
||||
{
|
||||
if (!isset($this->_fields[$field->name])) {
|
||||
$fieldNumber = count($this->_fields);
|
||||
$this->_fields[$field->name] =
|
||||
new Zend_Search_Lucene_Index_FieldInfo($field->name,
|
||||
$field->isIndexed,
|
||||
$fieldNumber,
|
||||
$field->storeTermVector);
|
||||
|
||||
return $fieldNumber;
|
||||
} else {
|
||||
$this->_fields[$field->name]->isIndexed |= $field->isIndexed;
|
||||
$this->_fields[$field->name]->storeTermVector |= $field->storeTermVector;
|
||||
|
||||
return $this->_fields[$field->name]->number;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add fieldInfo to the segment
|
||||
*
|
||||
* Returns actual field number
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_FieldInfo $fieldInfo
|
||||
* @return integer
|
||||
*/
|
||||
public function addFieldInfo(Zend_Search_Lucene_Index_FieldInfo $fieldInfo)
|
||||
{
|
||||
if (!isset($this->_fields[$fieldInfo->name])) {
|
||||
$fieldNumber = count($this->_fields);
|
||||
$this->_fields[$fieldInfo->name] =
|
||||
new Zend_Search_Lucene_Index_FieldInfo($fieldInfo->name,
|
||||
$fieldInfo->isIndexed,
|
||||
$fieldNumber,
|
||||
$fieldInfo->storeTermVector);
|
||||
|
||||
return $fieldNumber;
|
||||
} else {
|
||||
$this->_fields[$fieldInfo->name]->isIndexed |= $fieldInfo->isIndexed;
|
||||
$this->_fields[$fieldInfo->name]->storeTermVector |= $fieldInfo->storeTermVector;
|
||||
|
||||
return $this->_fields[$fieldInfo->name]->number;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns array of FieldInfo objects.
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getFieldInfos()
|
||||
{
|
||||
return $this->_fields;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add stored fields information
|
||||
*
|
||||
* @param array $storedFields array of Zend_Search_Lucene_Field objects
|
||||
*/
|
||||
public function addStoredFields($storedFields)
|
||||
{
|
||||
if (!isset($this->_fdxFile)) {
|
||||
$this->_fdxFile = $this->_directory->createFile($this->_name . '.fdx');
|
||||
$this->_fdtFile = $this->_directory->createFile($this->_name . '.fdt');
|
||||
|
||||
$this->_files[] = $this->_name . '.fdx';
|
||||
$this->_files[] = $this->_name . '.fdt';
|
||||
}
|
||||
|
||||
$this->_fdxFile->writeLong($this->_fdtFile->tell());
|
||||
$this->_fdtFile->writeVInt(count($storedFields));
|
||||
foreach ($storedFields as $field) {
|
||||
$this->_fdtFile->writeVInt($this->_fields[$field->name]->number);
|
||||
$fieldBits = ($field->isTokenized ? 0x01 : 0x00) |
|
||||
($field->isBinary ? 0x02 : 0x00) |
|
||||
0x00; /* 0x04 - third bit, compressed (ZLIB) */
|
||||
$this->_fdtFile->writeByte($fieldBits);
|
||||
if ($field->isBinary) {
|
||||
$this->_fdtFile->writeVInt(strlen($field->value));
|
||||
$this->_fdtFile->writeBytes($field->value);
|
||||
} else {
|
||||
$this->_fdtFile->writeString($field->getUtf8Value());
|
||||
}
|
||||
}
|
||||
|
||||
$this->_docCount++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the total number of documents in this segment.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function count()
|
||||
{
|
||||
return $this->_docCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return segment name
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getName()
|
||||
{
|
||||
return $this->_name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Dump Field Info (.fnm) segment file
|
||||
*/
|
||||
protected function _dumpFNM()
|
||||
{
|
||||
$fnmFile = $this->_directory->createFile($this->_name . '.fnm');
|
||||
$fnmFile->writeVInt(count($this->_fields));
|
||||
|
||||
$nrmFile = $this->_directory->createFile($this->_name . '.nrm');
|
||||
// Write header
|
||||
$nrmFile->writeBytes('NRM');
|
||||
// Write format specifier
|
||||
$nrmFile->writeByte((int)0xFF);
|
||||
|
||||
foreach ($this->_fields as $field) {
|
||||
$fnmFile->writeString($field->name);
|
||||
$fnmFile->writeByte(($field->isIndexed ? 0x01 : 0x00) |
|
||||
($field->storeTermVector ? 0x02 : 0x00)
|
||||
// not supported yet 0x04 /* term positions are stored with the term vectors */ |
|
||||
// not supported yet 0x08 /* term offsets are stored with the term vectors */ |
|
||||
);
|
||||
|
||||
if ($field->isIndexed) {
|
||||
// pre-2.1 index mode (not used now)
|
||||
// $normFileName = $this->_name . '.f' . $field->number;
|
||||
// $fFile = $this->_directory->createFile($normFileName);
|
||||
// $fFile->writeBytes($this->_norms[$field->name]);
|
||||
// $this->_files[] = $normFileName;
|
||||
|
||||
$nrmFile->writeBytes($this->_norms[$field->name]);
|
||||
}
|
||||
}
|
||||
|
||||
$this->_files[] = $this->_name . '.fnm';
|
||||
$this->_files[] = $this->_name . '.nrm';
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Term Dictionary file
|
||||
*
|
||||
* @var Zend_Search_Lucene_Storage_File
|
||||
*/
|
||||
private $_tisFile = null;
|
||||
|
||||
/**
|
||||
* Term Dictionary index file
|
||||
*
|
||||
* @var Zend_Search_Lucene_Storage_File
|
||||
*/
|
||||
private $_tiiFile = null;
|
||||
|
||||
/**
|
||||
* Frequencies file
|
||||
*
|
||||
* @var Zend_Search_Lucene_Storage_File
|
||||
*/
|
||||
private $_frqFile = null;
|
||||
|
||||
/**
|
||||
* Positions file
|
||||
*
|
||||
* @var Zend_Search_Lucene_Storage_File
|
||||
*/
|
||||
private $_prxFile = null;
|
||||
|
||||
/**
|
||||
* Number of written terms
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_termCount;
|
||||
|
||||
|
||||
/**
|
||||
* Last saved term
|
||||
*
|
||||
* @var Zend_Search_Lucene_Index_Term
|
||||
*/
|
||||
private $_prevTerm;
|
||||
|
||||
/**
|
||||
* Last saved term info
|
||||
*
|
||||
* @var Zend_Search_Lucene_Index_TermInfo
|
||||
*/
|
||||
private $_prevTermInfo;
|
||||
|
||||
/**
|
||||
* Last saved index term
|
||||
*
|
||||
* @var Zend_Search_Lucene_Index_Term
|
||||
*/
|
||||
private $_prevIndexTerm;
|
||||
|
||||
/**
|
||||
* Last saved index term info
|
||||
*
|
||||
* @var Zend_Search_Lucene_Index_TermInfo
|
||||
*/
|
||||
private $_prevIndexTermInfo;
|
||||
|
||||
/**
|
||||
* Last term dictionary file position
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_lastIndexPosition;
|
||||
|
||||
/**
|
||||
* Create dicrionary, frequency and positions files and write necessary headers
|
||||
*/
|
||||
public function initializeDictionaryFiles()
|
||||
{
|
||||
$this->_tisFile = $this->_directory->createFile($this->_name . '.tis');
|
||||
$this->_tisFile->writeInt((int)0xFFFFFFFD);
|
||||
$this->_tisFile->writeLong(0 /* dummy data for terms count */);
|
||||
$this->_tisFile->writeInt(self::$indexInterval);
|
||||
$this->_tisFile->writeInt(self::$skipInterval);
|
||||
$this->_tisFile->writeInt(self::$maxSkipLevels);
|
||||
|
||||
$this->_tiiFile = $this->_directory->createFile($this->_name . '.tii');
|
||||
$this->_tiiFile->writeInt((int)0xFFFFFFFD);
|
||||
$this->_tiiFile->writeLong(0 /* dummy data for terms count */);
|
||||
$this->_tiiFile->writeInt(self::$indexInterval);
|
||||
$this->_tiiFile->writeInt(self::$skipInterval);
|
||||
$this->_tiiFile->writeInt(self::$maxSkipLevels);
|
||||
|
||||
/** Dump dictionary header */
|
||||
$this->_tiiFile->writeVInt(0); // preffix length
|
||||
$this->_tiiFile->writeString(''); // suffix
|
||||
$this->_tiiFile->writeInt((int)0xFFFFFFFF); // field number
|
||||
$this->_tiiFile->writeByte((int)0x0F);
|
||||
$this->_tiiFile->writeVInt(0); // DocFreq
|
||||
$this->_tiiFile->writeVInt(0); // FreqDelta
|
||||
$this->_tiiFile->writeVInt(0); // ProxDelta
|
||||
$this->_tiiFile->writeVInt(24); // IndexDelta
|
||||
|
||||
$this->_frqFile = $this->_directory->createFile($this->_name . '.frq');
|
||||
$this->_prxFile = $this->_directory->createFile($this->_name . '.prx');
|
||||
|
||||
$this->_files[] = $this->_name . '.tis';
|
||||
$this->_files[] = $this->_name . '.tii';
|
||||
$this->_files[] = $this->_name . '.frq';
|
||||
$this->_files[] = $this->_name . '.prx';
|
||||
|
||||
$this->_prevTerm = null;
|
||||
$this->_prevTermInfo = null;
|
||||
$this->_prevIndexTerm = null;
|
||||
$this->_prevIndexTermInfo = null;
|
||||
$this->_lastIndexPosition = 24;
|
||||
$this->_termCount = 0;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Add term
|
||||
*
|
||||
* Term positions is an array( docId => array(pos1, pos2, pos3, ...), ... )
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $termEntry
|
||||
* @param array $termDocs
|
||||
*/
|
||||
public function addTerm($termEntry, $termDocs)
|
||||
{
|
||||
$freqPointer = $this->_frqFile->tell();
|
||||
$proxPointer = $this->_prxFile->tell();
|
||||
|
||||
$prevDoc = 0;
|
||||
foreach ($termDocs as $docId => $termPositions) {
|
||||
$docDelta = ($docId - $prevDoc)*2;
|
||||
$prevDoc = $docId;
|
||||
if (count($termPositions) > 1) {
|
||||
$this->_frqFile->writeVInt($docDelta);
|
||||
$this->_frqFile->writeVInt(count($termPositions));
|
||||
} else {
|
||||
$this->_frqFile->writeVInt($docDelta + 1);
|
||||
}
|
||||
|
||||
$prevPosition = 0;
|
||||
foreach ($termPositions as $position) {
|
||||
$this->_prxFile->writeVInt($position - $prevPosition);
|
||||
$prevPosition = $position;
|
||||
}
|
||||
}
|
||||
|
||||
if (count($termDocs) >= self::$skipInterval) {
|
||||
/**
|
||||
* @todo Write Skip Data to a freq file.
|
||||
* It's not used now, but make index more optimal
|
||||
*/
|
||||
$skipOffset = $this->_frqFile->tell() - $freqPointer;
|
||||
} else {
|
||||
$skipOffset = 0;
|
||||
}
|
||||
|
||||
$term = new Zend_Search_Lucene_Index_Term($termEntry->text,
|
||||
$this->_fields[$termEntry->field]->number);
|
||||
$termInfo = new Zend_Search_Lucene_Index_TermInfo(count($termDocs),
|
||||
$freqPointer, $proxPointer, $skipOffset);
|
||||
|
||||
$this->_dumpTermDictEntry($this->_tisFile, $this->_prevTerm, $term, $this->_prevTermInfo, $termInfo);
|
||||
|
||||
if (($this->_termCount + 1) % self::$indexInterval == 0) {
|
||||
$this->_dumpTermDictEntry($this->_tiiFile, $this->_prevIndexTerm, $term, $this->_prevIndexTermInfo, $termInfo);
|
||||
|
||||
$indexPosition = $this->_tisFile->tell();
|
||||
$this->_tiiFile->writeVInt($indexPosition - $this->_lastIndexPosition);
|
||||
$this->_lastIndexPosition = $indexPosition;
|
||||
|
||||
}
|
||||
$this->_termCount++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close dictionary
|
||||
*/
|
||||
public function closeDictionaryFiles()
|
||||
{
|
||||
$this->_tisFile->seek(4);
|
||||
$this->_tisFile->writeLong($this->_termCount);
|
||||
|
||||
$this->_tiiFile->seek(4);
|
||||
// + 1 is used to count an additional special index entry (empty term at the start of the list)
|
||||
$this->_tiiFile->writeLong(($this->_termCount - $this->_termCount % self::$indexInterval)/self::$indexInterval + 1);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Dump Term Dictionary segment file entry.
|
||||
* Used to write entry to .tis or .tii files
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_File $dicFile
|
||||
* @param Zend_Search_Lucene_Index_Term $prevTerm
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param Zend_Search_Lucene_Index_TermInfo $prevTermInfo
|
||||
* @param Zend_Search_Lucene_Index_TermInfo $termInfo
|
||||
*/
|
||||
protected function _dumpTermDictEntry(Zend_Search_Lucene_Storage_File $dicFile,
|
||||
&$prevTerm, Zend_Search_Lucene_Index_Term $term,
|
||||
&$prevTermInfo, Zend_Search_Lucene_Index_TermInfo $termInfo)
|
||||
{
|
||||
if (isset($prevTerm) && $prevTerm->field == $term->field) {
|
||||
$matchedBytes = 0;
|
||||
$maxBytes = min(strlen($prevTerm->text), strlen($term->text));
|
||||
while ($matchedBytes < $maxBytes &&
|
||||
$prevTerm->text[$matchedBytes] == $term->text[$matchedBytes]) {
|
||||
$matchedBytes++;
|
||||
}
|
||||
|
||||
// Calculate actual matched UTF-8 pattern
|
||||
$prefixBytes = 0;
|
||||
$prefixChars = 0;
|
||||
while ($prefixBytes < $matchedBytes) {
|
||||
$charBytes = 1;
|
||||
if ((ord($term->text[$prefixBytes]) & 0xC0) == 0xC0) {
|
||||
$charBytes++;
|
||||
if (ord($term->text[$prefixBytes]) & 0x20 ) {
|
||||
$charBytes++;
|
||||
if (ord($term->text[$prefixBytes]) & 0x10 ) {
|
||||
$charBytes++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($prefixBytes + $charBytes > $matchedBytes) {
|
||||
// char crosses matched bytes boundary
|
||||
// skip char
|
||||
break;
|
||||
}
|
||||
|
||||
$prefixChars++;
|
||||
$prefixBytes += $charBytes;
|
||||
}
|
||||
|
||||
// Write preffix length
|
||||
$dicFile->writeVInt($prefixChars);
|
||||
// Write suffix
|
||||
$dicFile->writeString(substr($term->text, $prefixBytes));
|
||||
} else {
|
||||
// Write preffix length
|
||||
$dicFile->writeVInt(0);
|
||||
// Write suffix
|
||||
$dicFile->writeString($term->text);
|
||||
}
|
||||
// Write field number
|
||||
$dicFile->writeVInt($term->field);
|
||||
// DocFreq (the count of documents which contain the term)
|
||||
$dicFile->writeVInt($termInfo->docFreq);
|
||||
|
||||
$prevTerm = $term;
|
||||
|
||||
if (!isset($prevTermInfo)) {
|
||||
// Write FreqDelta
|
||||
$dicFile->writeVInt($termInfo->freqPointer);
|
||||
// Write ProxDelta
|
||||
$dicFile->writeVInt($termInfo->proxPointer);
|
||||
} else {
|
||||
// Write FreqDelta
|
||||
$dicFile->writeVInt($termInfo->freqPointer - $prevTermInfo->freqPointer);
|
||||
// Write ProxDelta
|
||||
$dicFile->writeVInt($termInfo->proxPointer - $prevTermInfo->proxPointer);
|
||||
}
|
||||
// Write SkipOffset - it's not 0 when $termInfo->docFreq > self::$skipInterval
|
||||
if ($termInfo->skipOffset != 0) {
|
||||
$dicFile->writeVInt($termInfo->skipOffset);
|
||||
}
|
||||
|
||||
$prevTermInfo = $termInfo;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Generate compound index file
|
||||
*/
|
||||
protected function _generateCFS()
|
||||
{
|
||||
$cfsFile = $this->_directory->createFile($this->_name . '.cfs');
|
||||
$cfsFile->writeVInt(count($this->_files));
|
||||
|
||||
$dataOffsetPointers = array();
|
||||
foreach ($this->_files as $fileName) {
|
||||
$dataOffsetPointers[$fileName] = $cfsFile->tell();
|
||||
$cfsFile->writeLong(0); // write dummy data
|
||||
$cfsFile->writeString($fileName);
|
||||
}
|
||||
|
||||
foreach ($this->_files as $fileName) {
|
||||
// Get actual data offset
|
||||
$dataOffset = $cfsFile->tell();
|
||||
// Seek to the data offset pointer
|
||||
$cfsFile->seek($dataOffsetPointers[$fileName]);
|
||||
// Write actual data offset value
|
||||
$cfsFile->writeLong($dataOffset);
|
||||
// Seek back to the end of file
|
||||
$cfsFile->seek($dataOffset);
|
||||
|
||||
$dataFile = $this->_directory->getFileObject($fileName);
|
||||
|
||||
$byteCount = $this->_directory->fileLength($fileName);
|
||||
while ($byteCount > 0) {
|
||||
$data = $dataFile->readBytes(min($byteCount, 131072 /*128Kb*/));
|
||||
$byteCount -= strlen($data);
|
||||
$cfsFile->writeBytes($data);
|
||||
}
|
||||
|
||||
$this->_directory->deleteFile($fileName);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Close segment, write it to disk and return segment info
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_SegmentInfo
|
||||
*/
|
||||
abstract public function close();
|
||||
}
|
||||
|
230
thirdparty/Zend/Search/Lucene/Index/SegmentWriter/DocumentWriter.php
vendored
Normal file
230
thirdparty/Zend/Search/Lucene/Index/SegmentWriter/DocumentWriter.php
vendored
Normal file
@ -0,0 +1,230 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: DocumentWriter.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
/** Zend_Search_Lucene_Index_SegmentWriter */
|
||||
require_once 'Zend/Search/Lucene/Index/SegmentWriter.php';
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter extends Zend_Search_Lucene_Index_SegmentWriter
|
||||
{
|
||||
/**
|
||||
* Term Dictionary
|
||||
* Array of the Zend_Search_Lucene_Index_Term objects
|
||||
* Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $_termDictionary;
|
||||
|
||||
/**
|
||||
* Documents, which contain the term
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $_termDocs;
|
||||
|
||||
/**
|
||||
* Object constructor.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_Directory $directory
|
||||
* @param string $name
|
||||
*/
|
||||
public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name)
|
||||
{
|
||||
parent::__construct($directory, $name);
|
||||
|
||||
$this->_termDocs = array();
|
||||
$this->_termDictionary = array();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a document to this segment.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Document $document
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function addDocument(Zend_Search_Lucene_Document $document)
|
||||
{
|
||||
/** Zend_Search_Lucene_Search_Similarity */
|
||||
require_once 'Zend/Search/Lucene/Search/Similarity.php';
|
||||
|
||||
$storedFields = array();
|
||||
$docNorms = array();
|
||||
$similarity = Zend_Search_Lucene_Search_Similarity::getDefault();
|
||||
|
||||
foreach ($document->getFieldNames() as $fieldName) {
|
||||
$field = $document->getField($fieldName);
|
||||
|
||||
if ($field->storeTermVector) {
|
||||
/**
|
||||
* @todo term vector storing support
|
||||
*/
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Store term vector functionality is not supported yet.');
|
||||
}
|
||||
|
||||
if ($field->isIndexed) {
|
||||
if ($field->isTokenized) {
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
|
||||
|
||||
$analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault();
|
||||
$analyzer->setInput($field->value, $field->encoding);
|
||||
|
||||
$position = 0;
|
||||
$tokenCounter = 0;
|
||||
while (($token = $analyzer->nextToken()) !== null) {
|
||||
$tokenCounter++;
|
||||
|
||||
$term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $field->name);
|
||||
$termKey = $term->key();
|
||||
|
||||
if (!isset($this->_termDictionary[$termKey])) {
|
||||
// New term
|
||||
$this->_termDictionary[$termKey] = $term;
|
||||
$this->_termDocs[$termKey] = array();
|
||||
$this->_termDocs[$termKey][$this->_docCount] = array();
|
||||
} else if (!isset($this->_termDocs[$termKey][$this->_docCount])) {
|
||||
// Existing term, but new term entry
|
||||
$this->_termDocs[$termKey][$this->_docCount] = array();
|
||||
}
|
||||
$position += $token->getPositionIncrement();
|
||||
$this->_termDocs[$termKey][$this->_docCount][] = $position;
|
||||
}
|
||||
|
||||
if ($tokenCounter == 0) {
|
||||
// Field contains empty value. Treat it as non-indexed and non-tokenized
|
||||
$field = clone($field);
|
||||
$field->isIndexed = $field->isTokenized = false;
|
||||
} else {
|
||||
$docNorms[$field->name] = chr($similarity->encodeNorm( $similarity->lengthNorm($field->name,
|
||||
$tokenCounter)*
|
||||
$document->boost*
|
||||
$field->boost ));
|
||||
}
|
||||
} else if (($fieldUtf8Value = $field->getUtf8Value()) == '') {
|
||||
// Field contains empty value. Treat it as non-indexed and non-tokenized
|
||||
$field = clone($field);
|
||||
$field->isIndexed = $field->isTokenized = false;
|
||||
} else {
|
||||
$term = new Zend_Search_Lucene_Index_Term($fieldUtf8Value, $field->name);
|
||||
$termKey = $term->key();
|
||||
|
||||
if (!isset($this->_termDictionary[$termKey])) {
|
||||
// New term
|
||||
$this->_termDictionary[$termKey] = $term;
|
||||
$this->_termDocs[$termKey] = array();
|
||||
$this->_termDocs[$termKey][$this->_docCount] = array();
|
||||
} else if (!isset($this->_termDocs[$termKey][$this->_docCount])) {
|
||||
// Existing term, but new term entry
|
||||
$this->_termDocs[$termKey][$this->_docCount] = array();
|
||||
}
|
||||
$this->_termDocs[$termKey][$this->_docCount][] = 0; // position
|
||||
|
||||
$docNorms[$field->name] = chr($similarity->encodeNorm( $similarity->lengthNorm($field->name, 1)*
|
||||
$document->boost*
|
||||
$field->boost ));
|
||||
}
|
||||
}
|
||||
|
||||
if ($field->isStored) {
|
||||
$storedFields[] = $field;
|
||||
}
|
||||
|
||||
$this->addField($field);
|
||||
}
|
||||
|
||||
foreach ($this->_fields as $fieldName => $field) {
|
||||
if (!$field->isIndexed) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!isset($this->_norms[$fieldName])) {
|
||||
$this->_norms[$fieldName] = str_repeat(chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )),
|
||||
$this->_docCount);
|
||||
}
|
||||
|
||||
if (isset($docNorms[$fieldName])){
|
||||
$this->_norms[$fieldName] .= $docNorms[$fieldName];
|
||||
} else {
|
||||
$this->_norms[$fieldName] .= chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) ));
|
||||
}
|
||||
}
|
||||
|
||||
$this->addStoredFields($storedFields);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Dump Term Dictionary (.tis) and Term Dictionary Index (.tii) segment files
|
||||
*/
|
||||
protected function _dumpDictionary()
|
||||
{
|
||||
ksort($this->_termDictionary, SORT_STRING);
|
||||
|
||||
$this->initializeDictionaryFiles();
|
||||
|
||||
foreach ($this->_termDictionary as $termId => $term) {
|
||||
$this->addTerm($term, $this->_termDocs[$termId]);
|
||||
}
|
||||
|
||||
$this->closeDictionaryFiles();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Close segment, write it to disk and return segment info
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_SegmentInfo
|
||||
*/
|
||||
public function close()
|
||||
{
|
||||
if ($this->_docCount == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$this->_dumpFNM();
|
||||
$this->_dumpDictionary();
|
||||
|
||||
$this->_generateCFS();
|
||||
|
||||
/** Zend_Search_Lucene_Index_SegmentInfo */
|
||||
require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
|
||||
|
||||
return new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
|
||||
$this->_name,
|
||||
$this->_docCount,
|
||||
-1,
|
||||
null,
|
||||
true,
|
||||
true);
|
||||
}
|
||||
|
||||
}
|
||||
|
94
thirdparty/Zend/Search/Lucene/Index/SegmentWriter/StreamWriter.php
vendored
Normal file
94
thirdparty/Zend/Search/Lucene/Index/SegmentWriter/StreamWriter.php
vendored
Normal file
@ -0,0 +1,94 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: StreamWriter.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
/** Zend_Search_Lucene_Index_SegmentWriter */
|
||||
require_once 'Zend/Search/Lucene/Index/SegmentWriter.php';
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Index_SegmentWriter_StreamWriter extends Zend_Search_Lucene_Index_SegmentWriter
|
||||
{
|
||||
/**
|
||||
* Object constructor.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_Directory $directory
|
||||
* @param string $name
|
||||
*/
|
||||
public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name)
|
||||
{
|
||||
parent::__construct($directory, $name);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create stored fields files and open them for write
|
||||
*/
|
||||
public function createStoredFieldsFiles()
|
||||
{
|
||||
$this->_fdxFile = $this->_directory->createFile($this->_name . '.fdx');
|
||||
$this->_fdtFile = $this->_directory->createFile($this->_name . '.fdt');
|
||||
|
||||
$this->_files[] = $this->_name . '.fdx';
|
||||
$this->_files[] = $this->_name . '.fdt';
|
||||
}
|
||||
|
||||
public function addNorm($fieldName, $normVector)
|
||||
{
|
||||
if (isset($this->_norms[$fieldName])) {
|
||||
$this->_norms[$fieldName] .= $normVector;
|
||||
} else {
|
||||
$this->_norms[$fieldName] = $normVector;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Close segment, write it to disk and return segment info
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_SegmentInfo
|
||||
*/
|
||||
public function close()
|
||||
{
|
||||
if ($this->_docCount == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$this->_dumpFNM();
|
||||
$this->_generateCFS();
|
||||
|
||||
/** Zend_Search_Lucene_Index_SegmentInfo */
|
||||
require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
|
||||
|
||||
return new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
|
||||
$this->_name,
|
||||
$this->_docCount,
|
||||
-1,
|
||||
null,
|
||||
true,
|
||||
true);
|
||||
}
|
||||
}
|
||||
|
144
thirdparty/Zend/Search/Lucene/Index/Term.php
vendored
Normal file
144
thirdparty/Zend/Search/Lucene/Index/Term.php
vendored
Normal file
@ -0,0 +1,144 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Term.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* A Term represents a word from text. This is the unit of search. It is
|
||||
* composed of two elements, the text of the word, as a string, and the name of
|
||||
* the field that the text occured in, an interned string.
|
||||
*
|
||||
* Note that terms may represent more than words from text fields, but also
|
||||
* things like dates, email addresses, urls, etc.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Index_Term
|
||||
{
|
||||
/**
|
||||
* Field name or field number (depending from context)
|
||||
*
|
||||
* @var mixed
|
||||
*/
|
||||
public $field;
|
||||
|
||||
/**
|
||||
* Term value
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $text;
|
||||
|
||||
|
||||
/**
|
||||
* Object constructor
|
||||
*/
|
||||
public function __construct($text, $field = null)
|
||||
{
|
||||
$this->field = ($field === null)? Zend_Search_Lucene::getDefaultSearchField() : $field;
|
||||
$this->text = $text;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns term key
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function key()
|
||||
{
|
||||
return $this->field . chr(0) . $this->text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get term prefix
|
||||
*
|
||||
* @param string $str
|
||||
* @param integer $length
|
||||
* @return string
|
||||
*/
|
||||
public static function getPrefix($str, $length)
|
||||
{
|
||||
$prefixBytes = 0;
|
||||
$prefixChars = 0;
|
||||
while ($prefixBytes < strlen($str) && $prefixChars < $length) {
|
||||
$charBytes = 1;
|
||||
if ((ord($str[$prefixBytes]) & 0xC0) == 0xC0) {
|
||||
$charBytes++;
|
||||
if (ord($str[$prefixBytes]) & 0x20 ) {
|
||||
$charBytes++;
|
||||
if (ord($str[$prefixBytes]) & 0x10 ) {
|
||||
$charBytes++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($prefixBytes + $charBytes > strlen($str)) {
|
||||
// wrong character
|
||||
break;
|
||||
}
|
||||
|
||||
$prefixChars++;
|
||||
$prefixBytes += $charBytes;
|
||||
}
|
||||
|
||||
return substr($str, 0, $prefixBytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get UTF-8 string length
|
||||
*
|
||||
* @param string $str
|
||||
* @return string
|
||||
*/
|
||||
public static function getLength($str)
|
||||
{
|
||||
$bytes = 0;
|
||||
$chars = 0;
|
||||
while ($bytes < strlen($str)) {
|
||||
$charBytes = 1;
|
||||
if ((ord($str[$bytes]) & 0xC0) == 0xC0) {
|
||||
$charBytes++;
|
||||
if (ord($str[$bytes]) & 0x20 ) {
|
||||
$charBytes++;
|
||||
if (ord($str[$bytes]) & 0x10 ) {
|
||||
$charBytes++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($bytes + $charBytes > strlen($str)) {
|
||||
// wrong character
|
||||
break;
|
||||
}
|
||||
|
||||
$chars++;
|
||||
$bytes += $charBytes;
|
||||
}
|
||||
|
||||
return $chars;
|
||||
}
|
||||
}
|
||||
|
80
thirdparty/Zend/Search/Lucene/Index/TermInfo.php
vendored
Normal file
80
thirdparty/Zend/Search/Lucene/Index/TermInfo.php
vendored
Normal file
@ -0,0 +1,80 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: TermInfo.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* A Zend_Search_Lucene_Index_TermInfo represents a record of information stored for a term.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Index_TermInfo
|
||||
{
|
||||
/**
|
||||
* The number of documents which contain the term.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
public $docFreq;
|
||||
|
||||
/**
|
||||
* Data offset in a Frequencies file.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
public $freqPointer;
|
||||
|
||||
/**
|
||||
* Data offset in a Positions file.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
public $proxPointer;
|
||||
|
||||
/**
|
||||
* ScipData offset in a Frequencies file.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
public $skipOffset;
|
||||
|
||||
/**
|
||||
* Term offset of the _next_ term in a TermDictionary file.
|
||||
* Used only for Term Index
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
public $indexPointer;
|
||||
|
||||
public function __construct($docFreq, $freqPointer, $proxPointer, $skipOffset, $indexPointer = null)
|
||||
{
|
||||
$this->docFreq = $docFreq;
|
||||
$this->freqPointer = $freqPointer;
|
||||
$this->proxPointer = $proxPointer;
|
||||
$this->skipOffset = $skipOffset;
|
||||
$this->indexPointer = $indexPointer;
|
||||
}
|
||||
}
|
||||
|
49
thirdparty/Zend/Search/Lucene/Index/TermsPriorityQueue.php
vendored
Normal file
49
thirdparty/Zend/Search/Lucene/Index/TermsPriorityQueue.php
vendored
Normal file
@ -0,0 +1,49 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: TermsPriorityQueue.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
/** Zend_Search_Lucene_PriorityQueue */
|
||||
require_once 'Zend/Search/Lucene/PriorityQueue.php';
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Index_TermsPriorityQueue extends Zend_Search_Lucene_PriorityQueue
|
||||
{
|
||||
/**
|
||||
* Compare elements
|
||||
*
|
||||
* Returns true, if $termsStream1 is "less" than $termsStream2; else otherwise
|
||||
*
|
||||
* @param mixed $termsStream1
|
||||
* @param mixed $termsStream2
|
||||
* @return boolean
|
||||
*/
|
||||
protected function _less($termsStream1, $termsStream2)
|
||||
{
|
||||
return strcmp($termsStream1->currentTerm()->key(), $termsStream2->currentTerm()->key()) < 0;
|
||||
}
|
||||
|
||||
}
|
66
thirdparty/Zend/Search/Lucene/Index/TermsStream/Interface.php
vendored
Normal file
66
thirdparty/Zend/Search/Lucene/Index/TermsStream/Interface.php
vendored
Normal file
@ -0,0 +1,66 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Interface.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
interface Zend_Search_Lucene_Index_TermsStream_Interface
|
||||
{
|
||||
/**
|
||||
* Reset terms stream.
|
||||
*/
|
||||
public function resetTermsStream();
|
||||
|
||||
/**
|
||||
* Skip terms stream up to specified term preffix.
|
||||
*
|
||||
* Prefix contains fully specified field info and portion of searched term
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $prefix
|
||||
*/
|
||||
public function skipTo(Zend_Search_Lucene_Index_Term $prefix);
|
||||
|
||||
/**
|
||||
* Scans terms dictionary and returns next term
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_Term|null
|
||||
*/
|
||||
public function nextTerm();
|
||||
|
||||
/**
|
||||
* Returns term in current position
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_Term|null
|
||||
*/
|
||||
public function currentTerm();
|
||||
|
||||
/**
|
||||
* Close terms stream
|
||||
*
|
||||
* Should be used for resources clean up if stream is not read up to the end
|
||||
*/
|
||||
public function closeTermsStream();
|
||||
}
|
841
thirdparty/Zend/Search/Lucene/Index/Writer.php
vendored
Normal file
841
thirdparty/Zend/Search/Lucene/Index/Writer.php
vendored
Normal file
@ -0,0 +1,841 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Writer.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_LockManager */
|
||||
require_once 'Zend/Search/Lucene/LockManager.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Index_Writer
|
||||
{
|
||||
/**
|
||||
* @todo Implement Analyzer substitution
|
||||
* @todo Implement Zend_Search_Lucene_Storage_DirectoryRAM and Zend_Search_Lucene_Storage_FileRAM to use it for
|
||||
* temporary index files
|
||||
* @todo Directory lock processing
|
||||
*/
|
||||
|
||||
/**
|
||||
* Number of documents required before the buffered in-memory
|
||||
* documents are written into a new Segment
|
||||
*
|
||||
* Default value is 10
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
public $maxBufferedDocs = 10;
|
||||
|
||||
/**
|
||||
* Largest number of documents ever merged by addDocument().
|
||||
* Small values (e.g., less than 10,000) are best for interactive indexing,
|
||||
* as this limits the length of pauses while indexing to a few seconds.
|
||||
* Larger values are best for batched indexing and speedier searches.
|
||||
*
|
||||
* Default value is PHP_INT_MAX
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
public $maxMergeDocs = PHP_INT_MAX;
|
||||
|
||||
/**
|
||||
* Determines how often segment indices are merged by addDocument().
|
||||
*
|
||||
* With smaller values, less RAM is used while indexing,
|
||||
* and searches on unoptimized indices are faster,
|
||||
* but indexing speed is slower.
|
||||
*
|
||||
* With larger values, more RAM is used during indexing,
|
||||
* and while searches on unoptimized indices are slower,
|
||||
* indexing is faster.
|
||||
*
|
||||
* Thus larger values (> 10) are best for batch index creation,
|
||||
* and smaller values (< 10) for indices that are interactively maintained.
|
||||
*
|
||||
* Default value is 10
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
public $mergeFactor = 10;
|
||||
|
||||
/**
|
||||
* File system adapter.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Storage_Directory
|
||||
*/
|
||||
private $_directory = null;
|
||||
|
||||
|
||||
/**
|
||||
* Changes counter.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_versionUpdate = 0;
|
||||
|
||||
/**
|
||||
* List of the segments, created by index writer
|
||||
* Array of Zend_Search_Lucene_Index_SegmentInfo objects
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_newSegments = array();
|
||||
|
||||
/**
|
||||
* List of segments to be deleted on commit
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_segmentsToDelete = array();
|
||||
|
||||
/**
|
||||
* Current segment to add documents
|
||||
*
|
||||
* @var Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter
|
||||
*/
|
||||
private $_currentSegment = null;
|
||||
|
||||
/**
|
||||
* Array of Zend_Search_Lucene_Index_SegmentInfo objects for this index.
|
||||
*
|
||||
* It's a reference to the corresponding Zend_Search_Lucene::$_segmentInfos array
|
||||
*
|
||||
* @var array Zend_Search_Lucene_Index_SegmentInfo
|
||||
*/
|
||||
private $_segmentInfos;
|
||||
|
||||
/**
|
||||
* Index target format version
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_targetFormatVersion;
|
||||
|
||||
/**
|
||||
* List of indexfiles extensions
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private static $_indexExtensions = array('.cfs' => '.cfs',
|
||||
'.cfx' => '.cfx',
|
||||
'.fnm' => '.fnm',
|
||||
'.fdx' => '.fdx',
|
||||
'.fdt' => '.fdt',
|
||||
'.tis' => '.tis',
|
||||
'.tii' => '.tii',
|
||||
'.frq' => '.frq',
|
||||
'.prx' => '.prx',
|
||||
'.tvx' => '.tvx',
|
||||
'.tvd' => '.tvd',
|
||||
'.tvf' => '.tvf',
|
||||
'.del' => '.del',
|
||||
'.sti' => '.sti' );
|
||||
|
||||
|
||||
/**
|
||||
* Create empty index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_Directory $directory
|
||||
* @param integer $generation
|
||||
* @param integer $nameCount
|
||||
*/
|
||||
public static function createIndex(Zend_Search_Lucene_Storage_Directory $directory, $generation, $nameCount)
|
||||
{
|
||||
if ($generation == 0) {
|
||||
// Create index in pre-2.1 mode
|
||||
foreach ($directory->fileList() as $file) {
|
||||
if ($file == 'deletable' ||
|
||||
$file == 'segments' ||
|
||||
isset(self::$_indexExtensions[ substr($file, strlen($file)-4)]) ||
|
||||
preg_match('/\.f\d+$/i', $file) /* matches <segment_name>.f<decimal_nmber> file names */) {
|
||||
$directory->deleteFile($file);
|
||||
}
|
||||
}
|
||||
|
||||
$segmentsFile = $directory->createFile('segments');
|
||||
$segmentsFile->writeInt((int)0xFFFFFFFF);
|
||||
|
||||
// write version (initialized by current time)
|
||||
$segmentsFile->writeLong(round(microtime(true)));
|
||||
|
||||
// write name counter
|
||||
$segmentsFile->writeInt($nameCount);
|
||||
// write segment counter
|
||||
$segmentsFile->writeInt(0);
|
||||
|
||||
$deletableFile = $directory->createFile('deletable');
|
||||
// write counter
|
||||
$deletableFile->writeInt(0);
|
||||
} else {
|
||||
$genFile = $directory->createFile('segments.gen');
|
||||
|
||||
$genFile->writeInt((int)0xFFFFFFFE);
|
||||
// Write generation two times
|
||||
$genFile->writeLong($generation);
|
||||
$genFile->writeLong($generation);
|
||||
|
||||
$segmentsFile = $directory->createFile(Zend_Search_Lucene::getSegmentFileName($generation));
|
||||
$segmentsFile->writeInt((int)0xFFFFFFFD);
|
||||
|
||||
// write version (initialized by current time)
|
||||
$segmentsFile->writeLong(round(microtime(true)));
|
||||
|
||||
// write name counter
|
||||
$segmentsFile->writeInt($nameCount);
|
||||
// write segment counter
|
||||
$segmentsFile->writeInt(0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Open the index for writing
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_Directory $directory
|
||||
* @param array $segmentInfos
|
||||
* @param integer $targetFormatVersion
|
||||
* @param Zend_Search_Lucene_Storage_File $cleanUpLock
|
||||
*/
|
||||
public function __construct(Zend_Search_Lucene_Storage_Directory $directory, &$segmentInfos, $targetFormatVersion)
|
||||
{
|
||||
$this->_directory = $directory;
|
||||
$this->_segmentInfos = &$segmentInfos;
|
||||
$this->_targetFormatVersion = $targetFormatVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a document to this index.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Document $document
|
||||
*/
|
||||
public function addDocument(Zend_Search_Lucene_Document $document)
|
||||
{
|
||||
/** Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter */
|
||||
require_once 'Zend/Search/Lucene/Index/SegmentWriter/DocumentWriter.php';
|
||||
|
||||
if ($this->_currentSegment === null) {
|
||||
$this->_currentSegment =
|
||||
new Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter($this->_directory, $this->_newSegmentName());
|
||||
}
|
||||
$this->_currentSegment->addDocument($document);
|
||||
|
||||
if ($this->_currentSegment->count() >= $this->maxBufferedDocs) {
|
||||
$this->commit();
|
||||
}
|
||||
|
||||
$this->_maybeMergeSegments();
|
||||
|
||||
$this->_versionUpdate++;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Check if we have anything to merge
|
||||
*
|
||||
* @return boolean
|
||||
*/
|
||||
private function _hasAnythingToMerge()
|
||||
{
|
||||
$segmentSizes = array();
|
||||
foreach ($this->_segmentInfos as $segName => $segmentInfo) {
|
||||
$segmentSizes[$segName] = $segmentInfo->count();
|
||||
}
|
||||
|
||||
$mergePool = array();
|
||||
$poolSize = 0;
|
||||
$sizeToMerge = $this->maxBufferedDocs;
|
||||
asort($segmentSizes, SORT_NUMERIC);
|
||||
foreach ($segmentSizes as $segName => $size) {
|
||||
// Check, if segment comes into a new merging block
|
||||
while ($size >= $sizeToMerge) {
|
||||
// Merge previous block if it's large enough
|
||||
if ($poolSize >= $sizeToMerge) {
|
||||
return true;
|
||||
}
|
||||
$mergePool = array();
|
||||
$poolSize = 0;
|
||||
|
||||
$sizeToMerge *= $this->mergeFactor;
|
||||
|
||||
if ($sizeToMerge > $this->maxMergeDocs) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
$mergePool[] = $this->_segmentInfos[$segName];
|
||||
$poolSize += $size;
|
||||
}
|
||||
|
||||
if ($poolSize >= $sizeToMerge) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge segments if necessary
|
||||
*/
|
||||
private function _maybeMergeSegments()
|
||||
{
|
||||
if (Zend_Search_Lucene_LockManager::obtainOptimizationLock($this->_directory) === false) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!$this->_hasAnythingToMerge()) {
|
||||
Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
|
||||
return;
|
||||
}
|
||||
|
||||
// Update segments list to be sure all segments are not merged yet by another process
|
||||
//
|
||||
// Segment merging functionality is concentrated in this class and surrounded
|
||||
// by optimization lock obtaining/releasing.
|
||||
// _updateSegments() refreshes segments list from the latest index generation.
|
||||
// So only new segments can be added to the index while we are merging some already existing
|
||||
// segments.
|
||||
// Newly added segments will be also included into the index by the _updateSegments() call
|
||||
// either by another process or by the current process with the commit() call at the end of _mergeSegments() method.
|
||||
// That's guaranteed by the serialisation of _updateSegments() execution using exclusive locks.
|
||||
$this->_updateSegments();
|
||||
|
||||
// Perform standard auto-optimization procedure
|
||||
$segmentSizes = array();
|
||||
foreach ($this->_segmentInfos as $segName => $segmentInfo) {
|
||||
$segmentSizes[$segName] = $segmentInfo->count();
|
||||
}
|
||||
|
||||
$mergePool = array();
|
||||
$poolSize = 0;
|
||||
$sizeToMerge = $this->maxBufferedDocs;
|
||||
asort($segmentSizes, SORT_NUMERIC);
|
||||
foreach ($segmentSizes as $segName => $size) {
|
||||
// Check, if segment comes into a new merging block
|
||||
while ($size >= $sizeToMerge) {
|
||||
// Merge previous block if it's large enough
|
||||
if ($poolSize >= $sizeToMerge) {
|
||||
$this->_mergeSegments($mergePool);
|
||||
}
|
||||
$mergePool = array();
|
||||
$poolSize = 0;
|
||||
|
||||
$sizeToMerge *= $this->mergeFactor;
|
||||
|
||||
if ($sizeToMerge > $this->maxMergeDocs) {
|
||||
Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
$mergePool[] = $this->_segmentInfos[$segName];
|
||||
$poolSize += $size;
|
||||
}
|
||||
|
||||
if ($poolSize >= $sizeToMerge) {
|
||||
$this->_mergeSegments($mergePool);
|
||||
}
|
||||
|
||||
Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge specified segments
|
||||
*
|
||||
* $segments is an array of SegmentInfo objects
|
||||
*
|
||||
* @param array $segments
|
||||
*/
|
||||
private function _mergeSegments($segments)
|
||||
{
|
||||
$newName = $this->_newSegmentName();
|
||||
|
||||
/** Zend_Search_Lucene_Index_SegmentMerger */
|
||||
require_once 'Zend/Search/Lucene/Index/SegmentMerger.php';
|
||||
$merger = new Zend_Search_Lucene_Index_SegmentMerger($this->_directory,
|
||||
$newName);
|
||||
foreach ($segments as $segmentInfo) {
|
||||
$merger->addSource($segmentInfo);
|
||||
$this->_segmentsToDelete[$segmentInfo->getName()] = $segmentInfo->getName();
|
||||
}
|
||||
|
||||
$newSegment = $merger->merge();
|
||||
if ($newSegment !== null) {
|
||||
$this->_newSegments[$newSegment->getName()] = $newSegment;
|
||||
}
|
||||
|
||||
$this->commit();
|
||||
}
|
||||
|
||||
/**
|
||||
* Update segments file by adding current segment to a list
|
||||
*
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
private function _updateSegments()
|
||||
{
|
||||
// Get an exclusive index lock
|
||||
Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);
|
||||
|
||||
// Write down changes for the segments
|
||||
foreach ($this->_segmentInfos as $segInfo) {
|
||||
$segInfo->writeChanges();
|
||||
}
|
||||
|
||||
|
||||
$generation = Zend_Search_Lucene::getActualGeneration($this->_directory);
|
||||
$segmentsFile = $this->_directory->getFileObject(Zend_Search_Lucene::getSegmentFileName($generation), false);
|
||||
$newSegmentFile = $this->_directory->createFile(Zend_Search_Lucene::getSegmentFileName(++$generation), false);
|
||||
|
||||
try {
|
||||
$genFile = $this->_directory->getFileObject('segments.gen', false);
|
||||
} catch (Zend_Search_Lucene_Exception $e) {
|
||||
if (strpos($e->getMessage(), 'is not readable') !== false) {
|
||||
$genFile = $this->_directory->createFile('segments.gen');
|
||||
} else {
|
||||
throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
|
||||
}
|
||||
}
|
||||
|
||||
$genFile->writeInt((int)0xFFFFFFFE);
|
||||
// Write generation (first copy)
|
||||
$genFile->writeLong($generation);
|
||||
|
||||
try {
|
||||
// Write format marker
|
||||
if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_1) {
|
||||
$newSegmentFile->writeInt((int)0xFFFFFFFD);
|
||||
} else if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
|
||||
$newSegmentFile->writeInt((int)0xFFFFFFFC);
|
||||
}
|
||||
|
||||
// Read src file format identifier
|
||||
$format = $segmentsFile->readInt();
|
||||
if ($format == (int)0xFFFFFFFF) {
|
||||
$srcFormat = Zend_Search_Lucene::FORMAT_PRE_2_1;
|
||||
} else if ($format == (int)0xFFFFFFFD) {
|
||||
$srcFormat = Zend_Search_Lucene::FORMAT_2_1;
|
||||
} else if ($format == (int)0xFFFFFFFC) {
|
||||
$srcFormat = Zend_Search_Lucene::FORMAT_2_3;
|
||||
} else {
|
||||
throw new Zend_Search_Lucene_Exception('Unsupported segments file format');
|
||||
}
|
||||
|
||||
$version = $segmentsFile->readLong() + $this->_versionUpdate;
|
||||
$this->_versionUpdate = 0;
|
||||
$newSegmentFile->writeLong($version);
|
||||
|
||||
// Write segment name counter
|
||||
$newSegmentFile->writeInt($segmentsFile->readInt());
|
||||
|
||||
// Get number of segments offset
|
||||
$numOfSegmentsOffset = $newSegmentFile->tell();
|
||||
// Write dummy data (segment counter)
|
||||
$newSegmentFile->writeInt(0);
|
||||
|
||||
// Read number of segemnts
|
||||
$segmentsCount = $segmentsFile->readInt();
|
||||
|
||||
$segments = array();
|
||||
for ($count = 0; $count < $segmentsCount; $count++) {
|
||||
$segName = $segmentsFile->readString();
|
||||
$segSize = $segmentsFile->readInt();
|
||||
|
||||
if ($srcFormat == Zend_Search_Lucene::FORMAT_PRE_2_1) {
|
||||
// pre-2.1 index format
|
||||
$delGen = 0;
|
||||
$hasSingleNormFile = false;
|
||||
$numField = (int)0xFFFFFFFF;
|
||||
$isCompoundByte = 0;
|
||||
$docStoreOptions = null;
|
||||
} else {
|
||||
$delGen = $segmentsFile->readLong();
|
||||
|
||||
if ($srcFormat == Zend_Search_Lucene::FORMAT_2_3) {
|
||||
$docStoreOffset = $segmentsFile->readInt();
|
||||
|
||||
if ($docStoreOffset != (int)0xFFFFFFFF) {
|
||||
$docStoreSegment = $segmentsFile->readString();
|
||||
$docStoreIsCompoundFile = $segmentsFile->readByte();
|
||||
|
||||
$docStoreOptions = array('offset' => $docStoreOffset,
|
||||
'segment' => $docStoreSegment,
|
||||
'isCompound' => ($docStoreIsCompoundFile == 1));
|
||||
} else {
|
||||
$docStoreOptions = null;
|
||||
}
|
||||
} else {
|
||||
$docStoreOptions = null;
|
||||
}
|
||||
|
||||
$hasSingleNormFile = $segmentsFile->readByte();
|
||||
$numField = $segmentsFile->readInt();
|
||||
|
||||
$normGens = array();
|
||||
if ($numField != (int)0xFFFFFFFF) {
|
||||
for ($count1 = 0; $count1 < $numField; $count1++) {
|
||||
$normGens[] = $segmentsFile->readLong();
|
||||
}
|
||||
}
|
||||
$isCompoundByte = $segmentsFile->readByte();
|
||||
}
|
||||
|
||||
if (!in_array($segName, $this->_segmentsToDelete)) {
|
||||
// Load segment if necessary
|
||||
if (!isset($this->_segmentInfos[$segName])) {
|
||||
if ($isCompoundByte == 0xFF) {
|
||||
// The segment is not a compound file
|
||||
$isCompound = false;
|
||||
} else if ($isCompoundByte == 0x00) {
|
||||
// The status is unknown
|
||||
$isCompound = null;
|
||||
} else if ($isCompoundByte == 0x01) {
|
||||
// The segment is a compound file
|
||||
$isCompound = true;
|
||||
}
|
||||
|
||||
/** Zend_Search_Lucene_Index_SegmentInfo */
|
||||
require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
|
||||
$this->_segmentInfos[$segName] =
|
||||
new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
|
||||
$segName,
|
||||
$segSize,
|
||||
$delGen,
|
||||
$docStoreOptions,
|
||||
$hasSingleNormFile,
|
||||
$isCompound);
|
||||
} else {
|
||||
// Retrieve actual deletions file generation number
|
||||
$delGen = $this->_segmentInfos[$segName]->getDelGen();
|
||||
}
|
||||
|
||||
$newSegmentFile->writeString($segName);
|
||||
$newSegmentFile->writeInt($segSize);
|
||||
$newSegmentFile->writeLong($delGen);
|
||||
if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
|
||||
if ($docStoreOptions !== null) {
|
||||
$newSegmentFile->writeInt($docStoreOffset);
|
||||
$newSegmentFile->writeString($docStoreSegment);
|
||||
$newSegmentFile->writeByte($docStoreIsCompoundFile);
|
||||
} else {
|
||||
// Set DocStoreOffset to -1
|
||||
$newSegmentFile->writeInt((int)0xFFFFFFFF);
|
||||
}
|
||||
} else if ($docStoreOptions !== null) {
|
||||
// Release index write lock
|
||||
Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
|
||||
|
||||
throw new Zend_Search_Lucene_Exception('Index conversion to lower format version is not supported.');
|
||||
}
|
||||
|
||||
$newSegmentFile->writeByte($hasSingleNormFile);
|
||||
$newSegmentFile->writeInt($numField);
|
||||
if ($numField != (int)0xFFFFFFFF) {
|
||||
foreach ($normGens as $normGen) {
|
||||
$newSegmentFile->writeLong($normGen);
|
||||
}
|
||||
}
|
||||
$newSegmentFile->writeByte($isCompoundByte);
|
||||
|
||||
$segments[$segName] = $segSize;
|
||||
}
|
||||
}
|
||||
$segmentsFile->close();
|
||||
|
||||
$segmentsCount = count($segments) + count($this->_newSegments);
|
||||
|
||||
foreach ($this->_newSegments as $segName => $segmentInfo) {
|
||||
$newSegmentFile->writeString($segName);
|
||||
$newSegmentFile->writeInt($segmentInfo->count());
|
||||
|
||||
// delete file generation: -1 (there is no delete file yet)
|
||||
$newSegmentFile->writeInt((int)0xFFFFFFFF);$newSegmentFile->writeInt((int)0xFFFFFFFF);
|
||||
if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
|
||||
// docStoreOffset: -1 (segment doesn't use shared doc store)
|
||||
$newSegmentFile->writeInt((int)0xFFFFFFFF);
|
||||
}
|
||||
// HasSingleNormFile
|
||||
$newSegmentFile->writeByte($segmentInfo->hasSingleNormFile());
|
||||
// NumField
|
||||
$newSegmentFile->writeInt((int)0xFFFFFFFF);
|
||||
// IsCompoundFile
|
||||
$newSegmentFile->writeByte($segmentInfo->isCompound() ? 1 : -1);
|
||||
|
||||
$segments[$segmentInfo->getName()] = $segmentInfo->count();
|
||||
$this->_segmentInfos[$segName] = $segmentInfo;
|
||||
}
|
||||
$this->_newSegments = array();
|
||||
|
||||
$newSegmentFile->seek($numOfSegmentsOffset);
|
||||
$newSegmentFile->writeInt($segmentsCount); // Update segments count
|
||||
$newSegmentFile->close();
|
||||
} catch (Exception $e) {
|
||||
/** Restore previous index generation */
|
||||
$generation--;
|
||||
$genFile->seek(4, SEEK_SET);
|
||||
// Write generation number twice
|
||||
$genFile->writeLong($generation); $genFile->writeLong($generation);
|
||||
|
||||
// Release index write lock
|
||||
Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
|
||||
|
||||
// Throw the exception
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
|
||||
}
|
||||
|
||||
// Write generation (second copy)
|
||||
$genFile->writeLong($generation);
|
||||
|
||||
|
||||
// Check if another update or read process is not running now
|
||||
// If yes, skip clean-up procedure
|
||||
if (Zend_Search_Lucene_LockManager::escalateReadLock($this->_directory)) {
|
||||
/**
|
||||
* Clean-up directory
|
||||
*/
|
||||
$filesToDelete = array();
|
||||
$filesTypes = array();
|
||||
$filesNumbers = array();
|
||||
|
||||
// list of .del files of currently used segments
|
||||
// each segment can have several generations of .del files
|
||||
// only last should not be deleted
|
||||
$delFiles = array();
|
||||
|
||||
foreach ($this->_directory->fileList() as $file) {
|
||||
if ($file == 'deletable') {
|
||||
// 'deletable' file
|
||||
$filesToDelete[] = $file;
|
||||
$filesTypes[] = 0; // delete this file first, since it's not used starting from Lucene v2.1
|
||||
$filesNumbers[] = 0;
|
||||
} else if ($file == 'segments') {
|
||||
// 'segments' file
|
||||
$filesToDelete[] = $file;
|
||||
$filesTypes[] = 1; // second file to be deleted "zero" version of segments file (Lucene pre-2.1)
|
||||
$filesNumbers[] = 0;
|
||||
} else if (preg_match('/^segments_[a-zA-Z0-9]+$/i', $file)) {
|
||||
// 'segments_xxx' file
|
||||
// Check if it's not a just created generation file
|
||||
if ($file != Zend_Search_Lucene::getSegmentFileName($generation)) {
|
||||
$filesToDelete[] = $file;
|
||||
$filesTypes[] = 2; // first group of files for deletions
|
||||
$filesNumbers[] = (int)base_convert(substr($file, 9), 36, 10); // ordered by segment generation numbers
|
||||
}
|
||||
} else if (preg_match('/(^_([a-zA-Z0-9]+))\.f\d+$/i', $file, $matches)) {
|
||||
// one of per segment files ('<segment_name>.f<decimal_number>')
|
||||
// Check if it's not one of the segments in the current segments set
|
||||
if (!isset($segments[$matches[1]])) {
|
||||
$filesToDelete[] = $file;
|
||||
$filesTypes[] = 3; // second group of files for deletions
|
||||
$filesNumbers[] = (int)base_convert($matches[2], 36, 10); // order by segment number
|
||||
}
|
||||
} else if (preg_match('/(^_([a-zA-Z0-9]+))(_([a-zA-Z0-9]+))\.del$/i', $file, $matches)) {
|
||||
// one of per segment files ('<segment_name>_<del_generation>.del' where <segment_name> is '_<segment_number>')
|
||||
// Check if it's not one of the segments in the current segments set
|
||||
if (!isset($segments[$matches[1]])) {
|
||||
$filesToDelete[] = $file;
|
||||
$filesTypes[] = 3; // second group of files for deletions
|
||||
$filesNumbers[] = (int)base_convert($matches[2], 36, 10); // order by segment number
|
||||
} else {
|
||||
$segmentNumber = (int)base_convert($matches[2], 36, 10);
|
||||
$delGeneration = (int)base_convert($matches[4], 36, 10);
|
||||
if (!isset($delFiles[$segmentNumber])) {
|
||||
$delFiles[$segmentNumber] = array();
|
||||
}
|
||||
$delFiles[$segmentNumber][$delGeneration] = $file;
|
||||
}
|
||||
} else if (isset(self::$_indexExtensions[substr($file, strlen($file)-4)])) {
|
||||
// one of per segment files ('<segment_name>.<ext>')
|
||||
$segmentName = substr($file, 0, strlen($file) - 4);
|
||||
// Check if it's not one of the segments in the current segments set
|
||||
if (!isset($segments[$segmentName]) &&
|
||||
($this->_currentSegment === null || $this->_currentSegment->getName() != $segmentName)) {
|
||||
$filesToDelete[] = $file;
|
||||
$filesTypes[] = 3; // second group of files for deletions
|
||||
$filesNumbers[] = (int)base_convert(substr($file, 1 /* skip '_' */, strlen($file)-5), 36, 10); // order by segment number
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$maxGenNumber = 0;
|
||||
// process .del files of currently used segments
|
||||
foreach ($delFiles as $segmentNumber => $segmentDelFiles) {
|
||||
ksort($delFiles[$segmentNumber], SORT_NUMERIC);
|
||||
array_pop($delFiles[$segmentNumber]); // remove last delete file generation from candidates for deleting
|
||||
|
||||
end($delFiles[$segmentNumber]);
|
||||
$lastGenNumber = key($delFiles[$segmentNumber]);
|
||||
if ($lastGenNumber > $maxGenNumber) {
|
||||
$maxGenNumber = $lastGenNumber;
|
||||
}
|
||||
}
|
||||
foreach ($delFiles as $segmentNumber => $segmentDelFiles) {
|
||||
foreach ($segmentDelFiles as $delGeneration => $file) {
|
||||
$filesToDelete[] = $file;
|
||||
$filesTypes[] = 4; // third group of files for deletions
|
||||
$filesNumbers[] = $segmentNumber*$maxGenNumber + $delGeneration; // order by <segment_number>,<del_generation> pair
|
||||
}
|
||||
}
|
||||
|
||||
// Reorder files for deleting
|
||||
array_multisort($filesTypes, SORT_ASC, SORT_NUMERIC,
|
||||
$filesNumbers, SORT_ASC, SORT_NUMERIC,
|
||||
$filesToDelete, SORT_ASC, SORT_STRING);
|
||||
|
||||
foreach ($filesToDelete as $file) {
|
||||
try {
|
||||
/** Skip shared docstore segments deleting */
|
||||
/** @todo Process '.cfx' files to check if them are already unused */
|
||||
if (substr($file, strlen($file)-4) != '.cfx') {
|
||||
$this->_directory->deleteFile($file);
|
||||
}
|
||||
} catch (Zend_Search_Lucene_Exception $e) {
|
||||
if (strpos($e->getMessage(), 'Can\'t delete file') === false) {
|
||||
// That's not "file is under processing or already deleted" exception
|
||||
// Pass it through
|
||||
throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Return read lock into the previous state
|
||||
Zend_Search_Lucene_LockManager::deEscalateReadLock($this->_directory);
|
||||
} else {
|
||||
// Only release resources if another index reader is running now
|
||||
foreach ($this->_segmentsToDelete as $segName) {
|
||||
foreach (self::$_indexExtensions as $ext) {
|
||||
$this->_directory->purgeFile($segName . $ext);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Clean-up _segmentsToDelete container
|
||||
$this->_segmentsToDelete = array();
|
||||
|
||||
|
||||
// Release index write lock
|
||||
Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
|
||||
|
||||
// Remove unused segments from segments list
|
||||
foreach ($this->_segmentInfos as $segName => $segmentInfo) {
|
||||
if (!isset($segments[$segName])) {
|
||||
unset($this->_segmentInfos[$segName]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit current changes
|
||||
*/
|
||||
public function commit()
|
||||
{
|
||||
if ($this->_currentSegment !== null) {
|
||||
$newSegment = $this->_currentSegment->close();
|
||||
if ($newSegment !== null) {
|
||||
$this->_newSegments[$newSegment->getName()] = $newSegment;
|
||||
}
|
||||
$this->_currentSegment = null;
|
||||
}
|
||||
|
||||
$this->_updateSegments();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Merges the provided indexes into this index.
|
||||
*
|
||||
* @param array $readers
|
||||
* @return void
|
||||
*/
|
||||
public function addIndexes($readers)
|
||||
{
|
||||
/**
|
||||
* @todo implementation
|
||||
*/
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges all segments together into new one
|
||||
*
|
||||
* Returns true on success and false if another optimization or auto-optimization process
|
||||
* is running now
|
||||
*
|
||||
* @return boolean
|
||||
*/
|
||||
public function optimize()
|
||||
{
|
||||
if (Zend_Search_Lucene_LockManager::obtainOptimizationLock($this->_directory) === false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Update segments list to be sure all segments are not merged yet by another process
|
||||
//
|
||||
// Segment merging functionality is concentrated in this class and surrounded
|
||||
// by optimization lock obtaining/releasing.
|
||||
// _updateSegments() refreshes segments list from the latest index generation.
|
||||
// So only new segments can be added to the index while we are merging some already existing
|
||||
// segments.
|
||||
// Newly added segments will be also included into the index by the _updateSegments() call
|
||||
// either by another process or by the current process with the commit() call at the end of _mergeSegments() method.
|
||||
// That's guaranteed by the serialisation of _updateSegments() execution using exclusive locks.
|
||||
$this->_updateSegments();
|
||||
|
||||
$this->_mergeSegments($this->_segmentInfos);
|
||||
|
||||
Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get name for new segment
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private function _newSegmentName()
|
||||
{
|
||||
Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);
|
||||
|
||||
$generation = Zend_Search_Lucene::getActualGeneration($this->_directory);
|
||||
$segmentsFile = $this->_directory->getFileObject(Zend_Search_Lucene::getSegmentFileName($generation), false);
|
||||
|
||||
$segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version)
|
||||
$segmentNameCounter = $segmentsFile->readInt();
|
||||
|
||||
$segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version)
|
||||
$segmentsFile->writeInt($segmentNameCounter + 1);
|
||||
|
||||
// Flash output to guarantee that wrong value will not be loaded between unlock and
|
||||
// return (which calls $segmentsFile destructor)
|
||||
$segmentsFile->flush();
|
||||
|
||||
Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
|
||||
|
||||
return '_' . base_convert($segmentNameCounter, 10, 36);
|
||||
}
|
||||
|
||||
}
|
417
thirdparty/Zend/Search/Lucene/Interface.php
vendored
Normal file
417
thirdparty/Zend/Search/Lucene/Interface.php
vendored
Normal file
@ -0,0 +1,417 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Interface.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Index_TermsStream_Interface */
|
||||
require_once 'Zend/Search/Lucene/Index/TermsStream/Interface.php';
|
||||
|
||||
|
||||
/** Classes used within Zend_Search_Lucene_Interface API */
|
||||
|
||||
/** Zend_Search_Lucene_Document */
|
||||
require_once 'Zend/Search/Lucene/Document.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_Term */
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_DocsFilter */
|
||||
require_once 'Zend/Search/Lucene/Index/DocsFilter.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
interface Zend_Search_Lucene_Interface extends Zend_Search_Lucene_Index_TermsStream_Interface
|
||||
{
|
||||
/**
|
||||
* Get current generation number
|
||||
*
|
||||
* Returns generation number
|
||||
* 0 means pre-2.1 index format
|
||||
* -1 means there are no segments files.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_Directory $directory
|
||||
* @return integer
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public static function getActualGeneration(Zend_Search_Lucene_Storage_Directory $directory);
|
||||
|
||||
/**
|
||||
* Get segments file name
|
||||
*
|
||||
* @param integer $generation
|
||||
* @return string
|
||||
*/
|
||||
public static function getSegmentFileName($generation);
|
||||
|
||||
/**
|
||||
* Get index format version
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function getFormatVersion();
|
||||
|
||||
/**
|
||||
* Set index format version.
|
||||
* Index is converted to this format at the nearest upfdate time
|
||||
*
|
||||
* @param int $formatVersion
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function setFormatVersion($formatVersion);
|
||||
|
||||
/**
|
||||
* Returns the Zend_Search_Lucene_Storage_Directory instance for this index.
|
||||
*
|
||||
* @return Zend_Search_Lucene_Storage_Directory
|
||||
*/
|
||||
public function getDirectory();
|
||||
|
||||
/**
|
||||
* Returns the total number of documents in this index (including deleted documents).
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function count();
|
||||
|
||||
/**
|
||||
* Returns one greater than the largest possible document number.
|
||||
* This may be used to, e.g., determine how big to allocate a structure which will have
|
||||
* an element for every document number in an index.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function maxDoc();
|
||||
|
||||
/**
|
||||
* Returns the total number of non-deleted documents in this index.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function numDocs();
|
||||
|
||||
/**
|
||||
* Checks, that document is deleted
|
||||
*
|
||||
* @param integer $id
|
||||
* @return boolean
|
||||
* @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range
|
||||
*/
|
||||
public function isDeleted($id);
|
||||
|
||||
/**
|
||||
* Set default search field.
|
||||
*
|
||||
* Null means, that search is performed through all fields by default
|
||||
*
|
||||
* Default value is null
|
||||
*
|
||||
* @param string $fieldName
|
||||
*/
|
||||
public static function setDefaultSearchField($fieldName);
|
||||
|
||||
/**
|
||||
* Get default search field.
|
||||
*
|
||||
* Null means, that search is performed through all fields by default
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function getDefaultSearchField();
|
||||
|
||||
/**
|
||||
* Set result set limit.
|
||||
*
|
||||
* 0 (default) means no limit
|
||||
*
|
||||
* @param integer $limit
|
||||
*/
|
||||
public static function setResultSetLimit($limit);
|
||||
|
||||
/**
|
||||
* Set result set limit.
|
||||
*
|
||||
* 0 means no limit
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public static function getResultSetLimit();
|
||||
|
||||
/**
|
||||
* Retrieve index maxBufferedDocs option
|
||||
*
|
||||
* maxBufferedDocs is a minimal number of documents required before
|
||||
* the buffered in-memory documents are written into a new Segment
|
||||
*
|
||||
* Default value is 10
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function getMaxBufferedDocs();
|
||||
|
||||
/**
|
||||
* Set index maxBufferedDocs option
|
||||
*
|
||||
* maxBufferedDocs is a minimal number of documents required before
|
||||
* the buffered in-memory documents are written into a new Segment
|
||||
*
|
||||
* Default value is 10
|
||||
*
|
||||
* @param integer $maxBufferedDocs
|
||||
*/
|
||||
public function setMaxBufferedDocs($maxBufferedDocs);
|
||||
|
||||
/**
|
||||
* Retrieve index maxMergeDocs option
|
||||
*
|
||||
* maxMergeDocs is a largest number of documents ever merged by addDocument().
|
||||
* Small values (e.g., less than 10,000) are best for interactive indexing,
|
||||
* as this limits the length of pauses while indexing to a few seconds.
|
||||
* Larger values are best for batched indexing and speedier searches.
|
||||
*
|
||||
* Default value is PHP_INT_MAX
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function getMaxMergeDocs();
|
||||
|
||||
/**
|
||||
* Set index maxMergeDocs option
|
||||
*
|
||||
* maxMergeDocs is a largest number of documents ever merged by addDocument().
|
||||
* Small values (e.g., less than 10,000) are best for interactive indexing,
|
||||
* as this limits the length of pauses while indexing to a few seconds.
|
||||
* Larger values are best for batched indexing and speedier searches.
|
||||
*
|
||||
* Default value is PHP_INT_MAX
|
||||
*
|
||||
* @param integer $maxMergeDocs
|
||||
*/
|
||||
public function setMaxMergeDocs($maxMergeDocs);
|
||||
|
||||
/**
|
||||
* Retrieve index mergeFactor option
|
||||
*
|
||||
* mergeFactor determines how often segment indices are merged by addDocument().
|
||||
* With smaller values, less RAM is used while indexing,
|
||||
* and searches on unoptimized indices are faster,
|
||||
* but indexing speed is slower.
|
||||
* With larger values, more RAM is used during indexing,
|
||||
* and while searches on unoptimized indices are slower,
|
||||
* indexing is faster.
|
||||
* Thus larger values (> 10) are best for batch index creation,
|
||||
* and smaller values (< 10) for indices that are interactively maintained.
|
||||
*
|
||||
* Default value is 10
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function getMergeFactor();
|
||||
|
||||
/**
|
||||
* Set index mergeFactor option
|
||||
*
|
||||
* mergeFactor determines how often segment indices are merged by addDocument().
|
||||
* With smaller values, less RAM is used while indexing,
|
||||
* and searches on unoptimized indices are faster,
|
||||
* but indexing speed is slower.
|
||||
* With larger values, more RAM is used during indexing,
|
||||
* and while searches on unoptimized indices are slower,
|
||||
* indexing is faster.
|
||||
* Thus larger values (> 10) are best for batch index creation,
|
||||
* and smaller values (< 10) for indices that are interactively maintained.
|
||||
*
|
||||
* Default value is 10
|
||||
*
|
||||
* @param integer $maxMergeDocs
|
||||
*/
|
||||
public function setMergeFactor($mergeFactor);
|
||||
|
||||
/**
|
||||
* Performs a query against the index and returns an array
|
||||
* of Zend_Search_Lucene_Search_QueryHit objects.
|
||||
* Input is a string or Zend_Search_Lucene_Search_Query.
|
||||
*
|
||||
* @param mixed $query
|
||||
* @return array Zend_Search_Lucene_Search_QueryHit
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function find($query);
|
||||
|
||||
/**
|
||||
* Returns a list of all unique field names that exist in this index.
|
||||
*
|
||||
* @param boolean $indexed
|
||||
* @return array
|
||||
*/
|
||||
public function getFieldNames($indexed = false);
|
||||
|
||||
/**
|
||||
* Returns a Zend_Search_Lucene_Document object for the document
|
||||
* number $id in this index.
|
||||
*
|
||||
* @param integer|Zend_Search_Lucene_Search_QueryHit $id
|
||||
* @return Zend_Search_Lucene_Document
|
||||
*/
|
||||
public function getDocument($id);
|
||||
|
||||
/**
|
||||
* Returns true if index contain documents with specified term.
|
||||
*
|
||||
* Is used for query optimization.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @return boolean
|
||||
*/
|
||||
public function hasTerm(Zend_Search_Lucene_Index_Term $term);
|
||||
|
||||
/**
|
||||
* Returns IDs of all the documents containing term.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
* @return array
|
||||
*/
|
||||
public function termDocs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null);
|
||||
|
||||
/**
|
||||
* Returns documents filter for all documents containing term.
|
||||
*
|
||||
* It performs the same operation as termDocs, but return result as
|
||||
* Zend_Search_Lucene_Index_DocsFilter object
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
* @return Zend_Search_Lucene_Index_DocsFilter
|
||||
*/
|
||||
public function termDocsFilter(Zend_Search_Lucene_Index_Term $term, $docsFilter = null);
|
||||
|
||||
/**
|
||||
* Returns an array of all term freqs.
|
||||
* Return array structure: array( docId => freq, ...)
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
* @return integer
|
||||
*/
|
||||
public function termFreqs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null);
|
||||
|
||||
/**
|
||||
* Returns an array of all term positions in the documents.
|
||||
* Return array structure: array( docId => array( pos1, pos2, ...), ...)
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
* @return array
|
||||
*/
|
||||
public function termPositions(Zend_Search_Lucene_Index_Term $term, $docsFilter = null);
|
||||
|
||||
/**
|
||||
* Returns the number of documents in this index containing the $term.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @return integer
|
||||
*/
|
||||
public function docFreq(Zend_Search_Lucene_Index_Term $term);
|
||||
|
||||
/**
|
||||
* Retrive similarity used by index reader
|
||||
*
|
||||
* @return Zend_Search_Lucene_Search_Similarity
|
||||
*/
|
||||
public function getSimilarity();
|
||||
|
||||
/**
|
||||
* Returns a normalization factor for "field, document" pair.
|
||||
*
|
||||
* @param integer $id
|
||||
* @param string $fieldName
|
||||
* @return float
|
||||
*/
|
||||
public function norm($id, $fieldName);
|
||||
|
||||
/**
|
||||
* Returns true if any documents have been deleted from this index.
|
||||
*
|
||||
* @return boolean
|
||||
*/
|
||||
public function hasDeletions();
|
||||
|
||||
/**
|
||||
* Deletes a document from the index.
|
||||
* $id is an internal document id
|
||||
*
|
||||
* @param integer|Zend_Search_Lucene_Search_QueryHit $id
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function delete($id);
|
||||
|
||||
/**
|
||||
* Adds a document to this index.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Document $document
|
||||
*/
|
||||
public function addDocument(Zend_Search_Lucene_Document $document);
|
||||
|
||||
/**
|
||||
* Commit changes resulting from delete() or undeleteAll() operations.
|
||||
*/
|
||||
public function commit();
|
||||
|
||||
/**
|
||||
* Optimize index.
|
||||
*
|
||||
* Merges all segments into one
|
||||
*/
|
||||
public function optimize();
|
||||
|
||||
/**
|
||||
* Returns an array of all terms in this index.
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function terms();
|
||||
|
||||
/**
|
||||
* Undeletes all documents currently marked as deleted in this index.
|
||||
*/
|
||||
public function undeleteAll();
|
||||
|
||||
|
||||
/**
|
||||
* Add reference to the index object
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public function addReference();
|
||||
|
||||
/**
|
||||
* Remove reference from the index object
|
||||
*
|
||||
* When reference count becomes zero, index is closed and resources are cleaned up
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public function removeReference();
|
||||
}
|
236
thirdparty/Zend/Search/Lucene/LockManager.php
vendored
Normal file
236
thirdparty/Zend/Search/Lucene/LockManager.php
vendored
Normal file
@ -0,0 +1,236 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: LockManager.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
/** Zend_Search_Lucene_Storage_Directory */
|
||||
require_once 'Zend/Search/Lucene/Storage/Directory.php';
|
||||
|
||||
/** Zend_Search_Lucene_Storage_File */
|
||||
require_once 'Zend/Search/Lucene/Storage/File.php';
|
||||
|
||||
/**
|
||||
* This is an utility class which provides index locks processing functionality
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_LockManager
|
||||
{
|
||||
/**
|
||||
* consts for name of file to show lock status
|
||||
*/
|
||||
const WRITE_LOCK_FILE = 'write.lock.file';
|
||||
const READ_LOCK_FILE = 'read.lock.file';
|
||||
const READ_LOCK_PROCESSING_LOCK_FILE = 'read-lock-processing.lock.file';
|
||||
const OPTIMIZATION_LOCK_FILE = 'optimization.lock.file';
|
||||
|
||||
/**
|
||||
* Obtain exclusive write lock on the index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_Directory $lockDirectory
|
||||
* @return Zend_Search_Lucene_Storage_File
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public static function obtainWriteLock(Zend_Search_Lucene_Storage_Directory $lockDirectory)
|
||||
{
|
||||
$lock = $lockDirectory->createFile(self::WRITE_LOCK_FILE);
|
||||
if (!$lock->lock(LOCK_EX)) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Can\'t obtain exclusive index lock');
|
||||
}
|
||||
return $lock;
|
||||
}
|
||||
|
||||
/**
|
||||
* Release exclusive write lock
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_Directory $lockDirectory
|
||||
*/
|
||||
public static function releaseWriteLock(Zend_Search_Lucene_Storage_Directory $lockDirectory)
|
||||
{
|
||||
$lock = $lockDirectory->getFileObject(self::WRITE_LOCK_FILE);
|
||||
$lock->unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtain the exclusive "read escalation/de-escalation" lock
|
||||
*
|
||||
* Required to protect the escalate/de-escalate read lock process
|
||||
* on GFS (and potentially other) mounted filesystems.
|
||||
*
|
||||
* Why we need this:
|
||||
* While GFS supports cluster-wide locking via flock(), it's
|
||||
* implementation isn't quite what it should be. The locking
|
||||
* semantics that work consistently on a local filesystem tend to
|
||||
* fail on GFS mounted filesystems. This appears to be a design defect
|
||||
* in the implementation of GFS. How this manifests itself is that
|
||||
* conditional promotion of a shared lock to exclusive will always
|
||||
* fail, lock release requests are honored but not immediately
|
||||
* processed (causing erratic failures of subsequent conditional
|
||||
* requests) and the releasing of the exclusive lock before the
|
||||
* shared lock is set when a lock is demoted (which can open a window
|
||||
* of opportunity for another process to gain an exclusive lock when
|
||||
* it shoudln't be allowed to).
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_Directory $lockDirectory
|
||||
* @return Zend_Search_Lucene_Storage_File
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
private static function _startReadLockProcessing(Zend_Search_Lucene_Storage_Directory $lockDirectory)
|
||||
{
|
||||
$lock = $lockDirectory->createFile(self::READ_LOCK_PROCESSING_LOCK_FILE);
|
||||
if (!$lock->lock(LOCK_EX)) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Can\'t obtain exclusive lock for the read lock processing file');
|
||||
}
|
||||
return $lock;
|
||||
}
|
||||
|
||||
/**
|
||||
* Release the exclusive "read escalation/de-escalation" lock
|
||||
*
|
||||
* Required to protect the escalate/de-escalate read lock process
|
||||
* on GFS (and potentially other) mounted filesystems.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_Directory $lockDirectory
|
||||
*/
|
||||
private static function _stopReadLockProcessing(Zend_Search_Lucene_Storage_Directory $lockDirectory)
|
||||
{
|
||||
$lock = $lockDirectory->getFileObject(self::READ_LOCK_PROCESSING_LOCK_FILE);
|
||||
$lock->unlock();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Obtain shared read lock on the index
|
||||
*
|
||||
* It doesn't block other read or update processes, but prevent index from the premature cleaning-up
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_Directory $defaultLockDirectory
|
||||
* @return Zend_Search_Lucene_Storage_File
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public static function obtainReadLock(Zend_Search_Lucene_Storage_Directory $lockDirectory)
|
||||
{
|
||||
$lock = $lockDirectory->createFile(self::READ_LOCK_FILE);
|
||||
if (!$lock->lock(LOCK_SH)) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Can\'t obtain shared reading index lock');
|
||||
}
|
||||
return $lock;
|
||||
}
|
||||
|
||||
/**
|
||||
* Release shared read lock
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_Directory $lockDirectory
|
||||
*/
|
||||
public static function releaseReadLock(Zend_Search_Lucene_Storage_Directory $lockDirectory)
|
||||
{
|
||||
$lock = $lockDirectory->getFileObject(self::READ_LOCK_FILE);
|
||||
$lock->unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
* Escalate Read lock to exclusive level
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_Directory $lockDirectory
|
||||
* @return boolean
|
||||
*/
|
||||
public static function escalateReadLock(Zend_Search_Lucene_Storage_Directory $lockDirectory)
|
||||
{
|
||||
self::_startReadLockProcessing($lockDirectory);
|
||||
|
||||
$lock = $lockDirectory->getFileObject(self::READ_LOCK_FILE);
|
||||
|
||||
// First, release the shared lock for the benefit of GFS since
|
||||
// it will fail the conditional request to promote the lock to
|
||||
// "exclusive" while the shared lock is held (even when we are
|
||||
// the only holder).
|
||||
$lock->unlock();
|
||||
|
||||
// GFS is really poor. While the above "unlock" returns, GFS
|
||||
// doesn't clean up it's tables right away (which will potentially
|
||||
// cause the conditional locking for the "exclusive" lock to fail.
|
||||
// We will retry the conditional lock request several times on a
|
||||
// failure to get past this. The performance hit is negligible
|
||||
// in the grand scheme of things and only will occur with GFS
|
||||
// filesystems or if another local process has the shared lock
|
||||
// on local filesystems.
|
||||
for ($retries = 0; $retries < 10; $retries++) {
|
||||
if ($lock->lock(LOCK_EX, true)) {
|
||||
// Exclusive lock is obtained!
|
||||
self::_stopReadLockProcessing($lockDirectory);
|
||||
return true;
|
||||
}
|
||||
|
||||
// wait 1 microsecond
|
||||
usleep(1);
|
||||
}
|
||||
|
||||
// Restore lock state
|
||||
$lock->lock(LOCK_SH);
|
||||
|
||||
self::_stopReadLockProcessing($lockDirectory);
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* De-escalate Read lock to shared level
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_Directory $lockDirectory
|
||||
*/
|
||||
public static function deEscalateReadLock(Zend_Search_Lucene_Storage_Directory $lockDirectory)
|
||||
{
|
||||
$lock = $lockDirectory->getFileObject(self::READ_LOCK_FILE);
|
||||
$lock->lock(LOCK_SH);
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtain exclusive optimization lock on the index
|
||||
*
|
||||
* Returns lock object on success and false otherwise (doesn't block execution)
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_Directory $lockDirectory
|
||||
* @return mixed
|
||||
*/
|
||||
public static function obtainOptimizationLock(Zend_Search_Lucene_Storage_Directory $lockDirectory)
|
||||
{
|
||||
$lock = $lockDirectory->createFile(self::OPTIMIZATION_LOCK_FILE);
|
||||
if (!$lock->lock(LOCK_EX, true)) {
|
||||
return false;
|
||||
}
|
||||
return $lock;
|
||||
}
|
||||
|
||||
/**
|
||||
* Release exclusive optimization lock
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_Directory $lockDirectory
|
||||
*/
|
||||
public static function releaseOptimizationLock(Zend_Search_Lucene_Storage_Directory $lockDirectory)
|
||||
{
|
||||
$lock = $lockDirectory->getFileObject(self::OPTIMIZATION_LOCK_FILE);
|
||||
$lock->unlock();
|
||||
}
|
||||
|
||||
}
|
973
thirdparty/Zend/Search/Lucene/MultiSearcher.php
vendored
Normal file
973
thirdparty/Zend/Search/Lucene/MultiSearcher.php
vendored
Normal file
@ -0,0 +1,973 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: MultiSearcher.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Interface */
|
||||
require_once 'Zend/Search/Lucene/Interface.php';
|
||||
|
||||
/**
|
||||
* Multisearcher allows to search through several independent indexes.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Interface_MultiSearcher implements Zend_Search_Lucene_Interface
|
||||
{
|
||||
/**
|
||||
* List of indices for searching.
|
||||
* Array of Zend_Search_Lucene_Interface objects
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $_indices;
|
||||
|
||||
/**
|
||||
* Object constructor.
|
||||
*
|
||||
* @param array $indices Arrays of indices for search
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function __construct($indices = array())
|
||||
{
|
||||
$this->_indices = $indices;
|
||||
|
||||
foreach ($this->_indices as $index) {
|
||||
if (!$index instanceof Zend_Search_Lucene_Interface) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('sub-index objects have to implement Zend_Search_Lucene_Interface.');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add index for searching.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
*/
|
||||
public function addIndex(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
$this->_indices[] = $index;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get current generation number
|
||||
*
|
||||
* Returns generation number
|
||||
* 0 means pre-2.1 index format
|
||||
* -1 means there are no segments files.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_Directory $directory
|
||||
* @return integer
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public static function getActualGeneration(Zend_Search_Lucene_Storage_Directory $directory)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception("Generation number can't be retrieved for multi-searcher");
|
||||
}
|
||||
|
||||
/**
|
||||
* Get segments file name
|
||||
*
|
||||
* @param integer $generation
|
||||
* @return string
|
||||
*/
|
||||
public static function getSegmentFileName($generation)
|
||||
{
|
||||
return Zend_Search_Lucene::getSegmentFileName($generation);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get index format version
|
||||
*
|
||||
* @return integer
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function getFormatVersion()
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception("Format version can't be retrieved for multi-searcher");
|
||||
}
|
||||
|
||||
/**
|
||||
* Set index format version.
|
||||
* Index is converted to this format at the nearest upfdate time
|
||||
*
|
||||
* @param int $formatVersion
|
||||
*/
|
||||
public function setFormatVersion($formatVersion)
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
$index->setFormatVersion($formatVersion);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the Zend_Search_Lucene_Storage_Directory instance for this index.
|
||||
*
|
||||
* @return Zend_Search_Lucene_Storage_Directory
|
||||
*/
|
||||
public function getDirectory()
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception("Index directory can't be retrieved for multi-searcher");
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the total number of documents in this index (including deleted documents).
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function count()
|
||||
{
|
||||
$count = 0;
|
||||
|
||||
foreach ($this->_indices as $index) {
|
||||
$count += $this->_indices->count();
|
||||
}
|
||||
|
||||
return $count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns one greater than the largest possible document number.
|
||||
* This may be used to, e.g., determine how big to allocate a structure which will have
|
||||
* an element for every document number in an index.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function maxDoc()
|
||||
{
|
||||
return $this->count();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the total number of non-deleted documents in this index.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function numDocs()
|
||||
{
|
||||
$docs = 0;
|
||||
|
||||
foreach ($this->_indices as $index) {
|
||||
$docs += $this->_indices->numDocs();
|
||||
}
|
||||
|
||||
return $docs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks, that document is deleted
|
||||
*
|
||||
* @param integer $id
|
||||
* @return boolean
|
||||
* @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range
|
||||
*/
|
||||
public function isDeleted($id)
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
$indexCount = $index->count();
|
||||
|
||||
if ($indexCount > $id) {
|
||||
return $index->isDeleted($id);
|
||||
}
|
||||
|
||||
$id -= $indexCount;
|
||||
}
|
||||
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Set default search field.
|
||||
*
|
||||
* Null means, that search is performed through all fields by default
|
||||
*
|
||||
* Default value is null
|
||||
*
|
||||
* @param string $fieldName
|
||||
*/
|
||||
public static function setDefaultSearchField($fieldName)
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
$index->setDefaultSearchField($fieldName);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get default search field.
|
||||
*
|
||||
* Null means, that search is performed through all fields by default
|
||||
*
|
||||
* @return string
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public static function getDefaultSearchField()
|
||||
{
|
||||
if (count($this->_indices) == 0) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Indices list is empty');
|
||||
}
|
||||
|
||||
$defaultSearchField = reset($this->_indices)->getDefaultSearchField();
|
||||
|
||||
foreach ($this->_indices as $index) {
|
||||
if ($index->getDefaultSearchField() !== $defaultSearchField) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
|
||||
}
|
||||
}
|
||||
|
||||
return $defaultSearchField;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set result set limit.
|
||||
*
|
||||
* 0 (default) means no limit
|
||||
*
|
||||
* @param integer $limit
|
||||
*/
|
||||
public static function setResultSetLimit($limit)
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
$index->setResultSetLimit($limit);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set result set limit.
|
||||
*
|
||||
* 0 means no limit
|
||||
*
|
||||
* @return integer
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public static function getResultSetLimit()
|
||||
{
|
||||
if (count($this->_indices) == 0) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Indices list is empty');
|
||||
}
|
||||
|
||||
$defaultResultSetLimit = reset($this->_indices)->getResultSetLimit();
|
||||
|
||||
foreach ($this->_indices as $index) {
|
||||
if ($index->getResultSetLimit() !== $defaultResultSetLimit) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
|
||||
}
|
||||
}
|
||||
|
||||
return $defaultResultSetLimit;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve index maxBufferedDocs option
|
||||
*
|
||||
* maxBufferedDocs is a minimal number of documents required before
|
||||
* the buffered in-memory documents are written into a new Segment
|
||||
*
|
||||
* Default value is 10
|
||||
*
|
||||
* @return integer
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function getMaxBufferedDocs()
|
||||
{
|
||||
if (count($this->_indices) == 0) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Indices list is empty');
|
||||
}
|
||||
|
||||
$maxBufferedDocs = reset($this->_indices)->getMaxBufferedDocs();
|
||||
|
||||
foreach ($this->_indices as $index) {
|
||||
if ($index->getMaxBufferedDocs() !== $maxBufferedDocs) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
|
||||
}
|
||||
}
|
||||
|
||||
return $maxBufferedDocs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set index maxBufferedDocs option
|
||||
*
|
||||
* maxBufferedDocs is a minimal number of documents required before
|
||||
* the buffered in-memory documents are written into a new Segment
|
||||
*
|
||||
* Default value is 10
|
||||
*
|
||||
* @param integer $maxBufferedDocs
|
||||
*/
|
||||
public function setMaxBufferedDocs($maxBufferedDocs)
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
$index->setMaxBufferedDocs($maxBufferedDocs);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve index maxMergeDocs option
|
||||
*
|
||||
* maxMergeDocs is a largest number of documents ever merged by addDocument().
|
||||
* Small values (e.g., less than 10,000) are best for interactive indexing,
|
||||
* as this limits the length of pauses while indexing to a few seconds.
|
||||
* Larger values are best for batched indexing and speedier searches.
|
||||
*
|
||||
* Default value is PHP_INT_MAX
|
||||
*
|
||||
* @return integer
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function getMaxMergeDocs()
|
||||
{
|
||||
if (count($this->_indices) == 0) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Indices list is empty');
|
||||
}
|
||||
|
||||
$maxMergeDocs = reset($this->_indices)->getMaxMergeDocs();
|
||||
|
||||
foreach ($this->_indices as $index) {
|
||||
if ($index->getMaxMergeDocs() !== $maxMergeDocs) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
|
||||
}
|
||||
}
|
||||
|
||||
return $maxMergeDocs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set index maxMergeDocs option
|
||||
*
|
||||
* maxMergeDocs is a largest number of documents ever merged by addDocument().
|
||||
* Small values (e.g., less than 10,000) are best for interactive indexing,
|
||||
* as this limits the length of pauses while indexing to a few seconds.
|
||||
* Larger values are best for batched indexing and speedier searches.
|
||||
*
|
||||
* Default value is PHP_INT_MAX
|
||||
*
|
||||
* @param integer $maxMergeDocs
|
||||
*/
|
||||
public function setMaxMergeDocs($maxMergeDocs)
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
$index->setMaxMergeDocs($maxMergeDocs);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve index mergeFactor option
|
||||
*
|
||||
* mergeFactor determines how often segment indices are merged by addDocument().
|
||||
* With smaller values, less RAM is used while indexing,
|
||||
* and searches on unoptimized indices are faster,
|
||||
* but indexing speed is slower.
|
||||
* With larger values, more RAM is used during indexing,
|
||||
* and while searches on unoptimized indices are slower,
|
||||
* indexing is faster.
|
||||
* Thus larger values (> 10) are best for batch index creation,
|
||||
* and smaller values (< 10) for indices that are interactively maintained.
|
||||
*
|
||||
* Default value is 10
|
||||
*
|
||||
* @return integer
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function getMergeFactor()
|
||||
{
|
||||
if (count($this->_indices) == 0) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Indices list is empty');
|
||||
}
|
||||
|
||||
$mergeFactor = reset($this->_indices)->getMergeFactor();
|
||||
|
||||
foreach ($this->_indices as $index) {
|
||||
if ($index->getMergeFactor() !== $mergeFactor) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
|
||||
}
|
||||
}
|
||||
|
||||
return $mergeFactor;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set index mergeFactor option
|
||||
*
|
||||
* mergeFactor determines how often segment indices are merged by addDocument().
|
||||
* With smaller values, less RAM is used while indexing,
|
||||
* and searches on unoptimized indices are faster,
|
||||
* but indexing speed is slower.
|
||||
* With larger values, more RAM is used during indexing,
|
||||
* and while searches on unoptimized indices are slower,
|
||||
* indexing is faster.
|
||||
* Thus larger values (> 10) are best for batch index creation,
|
||||
* and smaller values (< 10) for indices that are interactively maintained.
|
||||
*
|
||||
* Default value is 10
|
||||
*
|
||||
* @param integer $maxMergeDocs
|
||||
*/
|
||||
public function setMergeFactor($mergeFactor)
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
$index->setMaxMergeDocs($mergeFactor);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a query against the index and returns an array
|
||||
* of Zend_Search_Lucene_Search_QueryHit objects.
|
||||
* Input is a string or Zend_Search_Lucene_Search_Query.
|
||||
*
|
||||
* @param mixed $query
|
||||
* @return array Zend_Search_Lucene_Search_QueryHit
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function find($query)
|
||||
{
|
||||
if (count($this->_indices) == 0) {
|
||||
return array();
|
||||
}
|
||||
|
||||
$hitsList = array();
|
||||
|
||||
$indexShift = 0;
|
||||
foreach ($this->_indices as $index) {
|
||||
$hits = $index->find($query);
|
||||
|
||||
if ($indexShift != 0) {
|
||||
foreach ($hits as $hit) {
|
||||
$hit->id += $indexShift;
|
||||
}
|
||||
}
|
||||
|
||||
$indexShift += $index->count();
|
||||
$hitsList[] = $hits;
|
||||
}
|
||||
|
||||
/** @todo Implement advanced sorting */
|
||||
|
||||
return call_user_func_array('array_merge', $hitsList);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of all unique field names that exist in this index.
|
||||
*
|
||||
* @param boolean $indexed
|
||||
* @return array
|
||||
*/
|
||||
public function getFieldNames($indexed = false)
|
||||
{
|
||||
$fieldNamesList = array();
|
||||
|
||||
foreach ($this->_indices as $index) {
|
||||
$fieldNamesList[] = $index->getFieldNames($indexed);
|
||||
}
|
||||
|
||||
return array_unique(call_user_func_array('array_merge', $fieldNamesList));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a Zend_Search_Lucene_Document object for the document
|
||||
* number $id in this index.
|
||||
*
|
||||
* @param integer|Zend_Search_Lucene_Search_QueryHit $id
|
||||
* @return Zend_Search_Lucene_Document
|
||||
* @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range
|
||||
*/
|
||||
public function getDocument($id)
|
||||
{
|
||||
if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {
|
||||
/* @var $id Zend_Search_Lucene_Search_QueryHit */
|
||||
$id = $id->id;
|
||||
}
|
||||
|
||||
foreach ($this->_indices as $index) {
|
||||
$indexCount = $index->count();
|
||||
|
||||
if ($indexCount > $id) {
|
||||
return $index->getDocument($id);
|
||||
}
|
||||
|
||||
$id -= $indexCount;
|
||||
}
|
||||
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if index contain documents with specified term.
|
||||
*
|
||||
* Is used for query optimization.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @return boolean
|
||||
*/
|
||||
public function hasTerm(Zend_Search_Lucene_Index_Term $term)
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
if ($index->hasTerm($term)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns IDs of all the documents containing term.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
* @return array
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function termDocs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
|
||||
{
|
||||
if ($docsFilter != null) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher');
|
||||
}
|
||||
|
||||
$docsList = array();
|
||||
|
||||
$indexShift = 0;
|
||||
foreach ($this->_indices as $index) {
|
||||
$docs = $index->termDocs($term);
|
||||
|
||||
if ($indexShift != 0) {
|
||||
foreach ($docs as $id => $docId) {
|
||||
$docs[$id] += $indexShift;
|
||||
}
|
||||
}
|
||||
|
||||
$indexShift += $index->count();
|
||||
$docsList[] = $docs;
|
||||
}
|
||||
|
||||
return call_user_func_array('array_merge', $docsList);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns documents filter for all documents containing term.
|
||||
*
|
||||
* It performs the same operation as termDocs, but return result as
|
||||
* Zend_Search_Lucene_Index_DocsFilter object
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
* @return Zend_Search_Lucene_Index_DocsFilter
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function termDocsFilter(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher');
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of all term freqs.
|
||||
* Return array structure: array( docId => freq, ...)
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
* @return integer
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function termFreqs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
|
||||
{
|
||||
if ($docsFilter != null) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher');
|
||||
}
|
||||
|
||||
$freqsList = array();
|
||||
|
||||
$indexShift = 0;
|
||||
foreach ($this->_indices as $index) {
|
||||
$freqs = $index->termFreqs($term);
|
||||
|
||||
if ($indexShift != 0) {
|
||||
$freqsShifted = array();
|
||||
|
||||
foreach ($freqs as $docId => $freq) {
|
||||
$freqsShifted[$docId + $indexShift] = $freq;
|
||||
}
|
||||
$freqs = $freqsShifted;
|
||||
}
|
||||
|
||||
$indexShift += $index->count();
|
||||
$freqsList[] = $freqs;
|
||||
}
|
||||
|
||||
return call_user_func_array('array_merge', $freqsList);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of all term positions in the documents.
|
||||
* Return array structure: array( docId => array( pos1, pos2, ...), ...)
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
* @return array
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function termPositions(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
|
||||
{
|
||||
if ($docsFilter != null) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher');
|
||||
}
|
||||
|
||||
$termPositionsList = array();
|
||||
|
||||
$indexShift = 0;
|
||||
foreach ($this->_indices as $index) {
|
||||
$termPositions = $index->termPositions($term);
|
||||
|
||||
if ($indexShift != 0) {
|
||||
$termPositionsShifted = array();
|
||||
|
||||
foreach ($termPositions as $docId => $positions) {
|
||||
$termPositions[$docId + $indexShift] = $positions;
|
||||
}
|
||||
$termPositions = $termPositionsShifted;
|
||||
}
|
||||
|
||||
$indexShift += $index->count();
|
||||
$termPositionsList[] = $termPositions;
|
||||
}
|
||||
|
||||
return call_user_func_array('array_merge', $termPositions);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of documents in this index containing the $term.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @return integer
|
||||
*/
|
||||
public function docFreq(Zend_Search_Lucene_Index_Term $term)
|
||||
{
|
||||
$docFreq = 0;
|
||||
|
||||
foreach ($this->_indices as $index) {
|
||||
$docFreq += $index->docFreq($term);
|
||||
}
|
||||
|
||||
return $docFreq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrive similarity used by index reader
|
||||
*
|
||||
* @return Zend_Search_Lucene_Search_Similarity
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function getSimilarity()
|
||||
{
|
||||
if (count($this->_indices) == 0) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Indices list is empty');
|
||||
}
|
||||
|
||||
$similarity = reset($this->_indices)->getSimilarity();
|
||||
|
||||
foreach ($this->_indices as $index) {
|
||||
if ($index->getSimilarity() !== $similarity) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Indices have different similarity.');
|
||||
}
|
||||
}
|
||||
|
||||
return $similarity;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a normalization factor for "field, document" pair.
|
||||
*
|
||||
* @param integer $id
|
||||
* @param string $fieldName
|
||||
* @return float
|
||||
*/
|
||||
public function norm($id, $fieldName)
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
$indexCount = $index->count();
|
||||
|
||||
if ($indexCount > $id) {
|
||||
return $index->norm($id, $fieldName);
|
||||
}
|
||||
|
||||
$id -= $indexCount;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if any documents have been deleted from this index.
|
||||
*
|
||||
* @return boolean
|
||||
*/
|
||||
public function hasDeletions()
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
if ($index->hasDeletions()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes a document from the index.
|
||||
* $id is an internal document id
|
||||
*
|
||||
* @param integer|Zend_Search_Lucene_Search_QueryHit $id
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function delete($id)
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
$indexCount = $index->count();
|
||||
|
||||
if ($indexCount > $id) {
|
||||
$index->delete($id);
|
||||
return;
|
||||
}
|
||||
|
||||
$id -= $indexCount;
|
||||
}
|
||||
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Callback used to choose target index for new documents
|
||||
*
|
||||
* Function/method signature:
|
||||
* Zend_Search_Lucene_Interface callbackFunction(Zend_Search_Lucene_Document $document, array $indices);
|
||||
*
|
||||
* null means "default documents distributing algorithm"
|
||||
*
|
||||
* @var callback
|
||||
*/
|
||||
protected $_documentDistributorCallBack = null;
|
||||
|
||||
/**
|
||||
* Set callback for choosing target index.
|
||||
*
|
||||
* @param callback $callback
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function setDocumentDistributorCallback($callback)
|
||||
{
|
||||
if ($callback !== null && !is_callable($callback)) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('$callback parameter must be a valid callback.');
|
||||
}
|
||||
|
||||
$this->_documentDistributorCallBack = $callback;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get callback for choosing target index.
|
||||
*
|
||||
* @return callback
|
||||
*/
|
||||
public function getDocumentDistributorCallback()
|
||||
{
|
||||
return $this->_documentDistributorCallBack;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a document to this index.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Document $document
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function addDocument(Zend_Search_Lucene_Document $document)
|
||||
{
|
||||
if ($this->_documentDistributorCallBack !== null) {
|
||||
$index = call_user_func($this->_documentDistributorCallBack, $document, $this->_indices);
|
||||
} else {
|
||||
$index = $this->_indices[array_rand($this->_indices)];
|
||||
}
|
||||
|
||||
$index->addDocument($document);
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit changes resulting from delete() or undeleteAll() operations.
|
||||
*/
|
||||
public function commit()
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
$index->commit();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimize index.
|
||||
*
|
||||
* Merges all segments into one
|
||||
*/
|
||||
public function optimize()
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
$index->optimise();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of all terms in this index.
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function terms()
|
||||
{
|
||||
$termsList = array();
|
||||
|
||||
foreach ($this->_indices as $index) {
|
||||
$termsList[] = $index->terms();
|
||||
}
|
||||
|
||||
return array_unique(call_user_func_array('array_merge', $termsList));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Terms stream priority queue object
|
||||
*
|
||||
* @var Zend_Search_Lucene_TermStreamsPriorityQueue
|
||||
*/
|
||||
private $_termsStream = null;
|
||||
|
||||
/**
|
||||
* Reset terms stream.
|
||||
*/
|
||||
public function resetTermsStream()
|
||||
{
|
||||
if ($this->_termsStream === null) {
|
||||
/** Zend_Search_Lucene_TermStreamsPriorityQueue */
|
||||
require_once 'Zend/Search/Lucene/TermStreamsPriorityQueue.php';
|
||||
|
||||
$this->_termsStream = new Zend_Search_Lucene_TermStreamsPriorityQueue($this->_indices);
|
||||
} else {
|
||||
$this->_termsStream->resetTermsStream();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Skip terms stream up to specified term preffix.
|
||||
*
|
||||
* Prefix contains fully specified field info and portion of searched term
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $prefix
|
||||
*/
|
||||
public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
|
||||
{
|
||||
$this->_termsStream->skipTo($prefix);
|
||||
}
|
||||
|
||||
/**
|
||||
* Scans terms dictionary and returns next term
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_Term|null
|
||||
*/
|
||||
public function nextTerm()
|
||||
{
|
||||
return $this->_termsStream->nextTerm();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns term in current position
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_Term|null
|
||||
*/
|
||||
public function currentTerm()
|
||||
{
|
||||
return $this->_termsStream->currentTerm();
|
||||
}
|
||||
|
||||
/**
|
||||
* Close terms stream
|
||||
*
|
||||
* Should be used for resources clean up if stream is not read up to the end
|
||||
*/
|
||||
public function closeTermsStream()
|
||||
{
|
||||
$this->_termsStream->closeTermsStream();
|
||||
$this->_termsStream = null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Undeletes all documents currently marked as deleted in this index.
|
||||
*/
|
||||
public function undeleteAll()
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
$index->undeleteAll();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add reference to the index object
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public function addReference()
|
||||
{
|
||||
// Do nothing, since it's never referenced by indices
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove reference from the index object
|
||||
*
|
||||
* When reference count becomes zero, index is closed and resources are cleaned up
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public function removeReference()
|
||||
{
|
||||
// Do nothing, since it's never referenced by indices
|
||||
}
|
||||
}
|
171
thirdparty/Zend/Search/Lucene/PriorityQueue.php
vendored
Normal file
171
thirdparty/Zend/Search/Lucene/PriorityQueue.php
vendored
Normal file
@ -0,0 +1,171 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: PriorityQueue.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Abstract Priority Queue
|
||||
*
|
||||
* It implements a priority queue.
|
||||
* Please go to "Data Structures and Algorithms",
|
||||
* Aho, Hopcroft, and Ullman, Addison-Wesley, 1983 (corrected 1987 edition),
|
||||
* for implementation details.
|
||||
*
|
||||
* It provides O(log(N)) time of put/pop operations, where N is a size of queue
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
abstract class Zend_Search_Lucene_PriorityQueue
|
||||
{
|
||||
/**
|
||||
* Queue heap
|
||||
*
|
||||
* Heap contains balanced partial ordered binary tree represented in array
|
||||
* [0] - top of the tree
|
||||
* [1] - first child of [0]
|
||||
* [2] - second child of [0]
|
||||
* ...
|
||||
* [2*n + 1] - first child of [n]
|
||||
* [2*n + 2] - second child of [n]
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_heap = array();
|
||||
|
||||
|
||||
/**
|
||||
* Add element to the queue
|
||||
*
|
||||
* O(log(N)) time
|
||||
*
|
||||
* @param mixed $element
|
||||
*/
|
||||
public function put($element)
|
||||
{
|
||||
$nodeId = count($this->_heap);
|
||||
$parentId = ($nodeId-1) >> 1; // floor( ($nodeId-1)/2 )
|
||||
|
||||
while ($nodeId != 0 && $this->_less($element, $this->_heap[$parentId])) {
|
||||
// Move parent node down
|
||||
$this->_heap[$nodeId] = $this->_heap[$parentId];
|
||||
|
||||
// Move pointer to the next level of tree
|
||||
$nodeId = $parentId;
|
||||
$parentId = ($nodeId-1) >> 1; // floor( ($nodeId-1)/2 )
|
||||
}
|
||||
|
||||
// Put new node into the tree
|
||||
$this->_heap[$nodeId] = $element;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return least element of the queue
|
||||
*
|
||||
* Constant time
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
public function top()
|
||||
{
|
||||
if (count($this->_heap) == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return $this->_heap[0];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Removes and return least element of the queue
|
||||
*
|
||||
* O(log(N)) time
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
public function pop()
|
||||
{
|
||||
if (count($this->_heap) == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$top = $this->_heap[0];
|
||||
$lastId = count($this->_heap) - 1;
|
||||
|
||||
/**
|
||||
* Find appropriate position for last node
|
||||
*/
|
||||
$nodeId = 0; // Start from a top
|
||||
$childId = 1; // First child
|
||||
|
||||
// Choose smaller child
|
||||
if ($lastId > 2 && $this->_less($this->_heap[2], $this->_heap[1])) {
|
||||
$childId = 2;
|
||||
}
|
||||
|
||||
while ($childId < $lastId &&
|
||||
$this->_less($this->_heap[$childId], $this->_heap[$lastId])
|
||||
) {
|
||||
// Move child node up
|
||||
$this->_heap[$nodeId] = $this->_heap[$childId];
|
||||
|
||||
$nodeId = $childId; // Go down
|
||||
$childId = ($nodeId << 1) + 1; // First child
|
||||
|
||||
// Choose smaller child
|
||||
if (($childId+1) < $lastId &&
|
||||
$this->_less($this->_heap[$childId+1], $this->_heap[$childId])
|
||||
) {
|
||||
$childId++;
|
||||
}
|
||||
}
|
||||
|
||||
// Move last element to the new position
|
||||
$this->_heap[$nodeId] = $this->_heap[$lastId];
|
||||
unset($this->_heap[$lastId]);
|
||||
|
||||
return $top;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Clear queue
|
||||
*/
|
||||
public function clear()
|
||||
{
|
||||
$this->_heap = array();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Compare elements
|
||||
*
|
||||
* Returns true, if $el1 is less than $el2; else otherwise
|
||||
*
|
||||
* @param mixed $el1
|
||||
* @param mixed $el2
|
||||
* @return boolean
|
||||
*/
|
||||
abstract protected function _less($el1, $el2);
|
||||
}
|
||||
|
612
thirdparty/Zend/Search/Lucene/Proxy.php
vendored
Normal file
612
thirdparty/Zend/Search/Lucene/Proxy.php
vendored
Normal file
@ -0,0 +1,612 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Proxy.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
/** Zend_Search_Lucene_Interface */
|
||||
require_once 'Zend/Search/Lucene/Interface.php';
|
||||
|
||||
|
||||
/**
|
||||
* Proxy class intended to be used in userland.
|
||||
*
|
||||
* It tracks, when index object goes out of scope and forces ndex closing
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Proxy implements Zend_Search_Lucene_Interface
|
||||
{
|
||||
/**
|
||||
* Index object
|
||||
*
|
||||
* @var Zend_Search_Lucene_Interface
|
||||
*/
|
||||
private $_index;
|
||||
|
||||
/**
|
||||
* Object constructor
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
*/
|
||||
public function __construct(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
$this->_index = $index;
|
||||
$this->_index->addReference();
|
||||
}
|
||||
|
||||
/**
|
||||
* Object destructor
|
||||
*/
|
||||
public function __destruct()
|
||||
{
|
||||
if ($this->_index !== null) {
|
||||
// This code is invoked if Zend_Search_Lucene_Interface object constructor throws an exception
|
||||
$this->_index->removeReference();
|
||||
}
|
||||
$this->_index = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current generation number
|
||||
*
|
||||
* Returns generation number
|
||||
* 0 means pre-2.1 index format
|
||||
* -1 means there are no segments files.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_Directory $directory
|
||||
* @return integer
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public static function getActualGeneration(Zend_Search_Lucene_Storage_Directory $directory)
|
||||
{
|
||||
Zend_Search_Lucene::getActualGeneration($directory);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get segments file name
|
||||
*
|
||||
* @param integer $generation
|
||||
* @return string
|
||||
*/
|
||||
public static function getSegmentFileName($generation)
|
||||
{
|
||||
Zend_Search_Lucene::getSegmentFileName($generation);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get index format version
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function getFormatVersion()
|
||||
{
|
||||
return $this->_index->getFormatVersion();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set index format version.
|
||||
* Index is converted to this format at the nearest upfdate time
|
||||
*
|
||||
* @param int $formatVersion
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function setFormatVersion($formatVersion)
|
||||
{
|
||||
$this->_index->setFormatVersion($formatVersion);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the Zend_Search_Lucene_Storage_Directory instance for this index.
|
||||
*
|
||||
* @return Zend_Search_Lucene_Storage_Directory
|
||||
*/
|
||||
public function getDirectory()
|
||||
{
|
||||
return $this->_index->getDirectory();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the total number of documents in this index (including deleted documents).
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function count()
|
||||
{
|
||||
return $this->_index->count();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns one greater than the largest possible document number.
|
||||
* This may be used to, e.g., determine how big to allocate a structure which will have
|
||||
* an element for every document number in an index.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function maxDoc()
|
||||
{
|
||||
return $this->_index->maxDoc();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the total number of non-deleted documents in this index.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function numDocs()
|
||||
{
|
||||
return $this->_index->numDocs();
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks, that document is deleted
|
||||
*
|
||||
* @param integer $id
|
||||
* @return boolean
|
||||
* @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range
|
||||
*/
|
||||
public function isDeleted($id)
|
||||
{
|
||||
return $this->_index->isDeleted($id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set default search field.
|
||||
*
|
||||
* Null means, that search is performed through all fields by default
|
||||
*
|
||||
* Default value is null
|
||||
*
|
||||
* @param string $fieldName
|
||||
*/
|
||||
public static function setDefaultSearchField($fieldName)
|
||||
{
|
||||
Zend_Search_Lucene::setDefaultSearchField($fieldName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get default search field.
|
||||
*
|
||||
* Null means, that search is performed through all fields by default
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function getDefaultSearchField()
|
||||
{
|
||||
return Zend_Search_Lucene::getDefaultSearchField();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set result set limit.
|
||||
*
|
||||
* 0 (default) means no limit
|
||||
*
|
||||
* @param integer $limit
|
||||
*/
|
||||
public static function setResultSetLimit($limit)
|
||||
{
|
||||
Zend_Search_Lucene::setResultSetLimit($limit);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set result set limit.
|
||||
*
|
||||
* 0 means no limit
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public static function getResultSetLimit()
|
||||
{
|
||||
return Zend_Search_Lucene::getResultSetLimit();
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve index maxBufferedDocs option
|
||||
*
|
||||
* maxBufferedDocs is a minimal number of documents required before
|
||||
* the buffered in-memory documents are written into a new Segment
|
||||
*
|
||||
* Default value is 10
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function getMaxBufferedDocs()
|
||||
{
|
||||
return $this->_index->getMaxBufferedDocs();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set index maxBufferedDocs option
|
||||
*
|
||||
* maxBufferedDocs is a minimal number of documents required before
|
||||
* the buffered in-memory documents are written into a new Segment
|
||||
*
|
||||
* Default value is 10
|
||||
*
|
||||
* @param integer $maxBufferedDocs
|
||||
*/
|
||||
public function setMaxBufferedDocs($maxBufferedDocs)
|
||||
{
|
||||
$this->_index->setMaxBufferedDocs($maxBufferedDocs);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieve index maxMergeDocs option
|
||||
*
|
||||
* maxMergeDocs is a largest number of documents ever merged by addDocument().
|
||||
* Small values (e.g., less than 10,000) are best for interactive indexing,
|
||||
* as this limits the length of pauses while indexing to a few seconds.
|
||||
* Larger values are best for batched indexing and speedier searches.
|
||||
*
|
||||
* Default value is PHP_INT_MAX
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function getMaxMergeDocs()
|
||||
{
|
||||
return $this->_index->getMaxMergeDocs();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set index maxMergeDocs option
|
||||
*
|
||||
* maxMergeDocs is a largest number of documents ever merged by addDocument().
|
||||
* Small values (e.g., less than 10,000) are best for interactive indexing,
|
||||
* as this limits the length of pauses while indexing to a few seconds.
|
||||
* Larger values are best for batched indexing and speedier searches.
|
||||
*
|
||||
* Default value is PHP_INT_MAX
|
||||
*
|
||||
* @param integer $maxMergeDocs
|
||||
*/
|
||||
public function setMaxMergeDocs($maxMergeDocs)
|
||||
{
|
||||
$this->_index->setMaxMergeDocs($maxMergeDocs);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieve index mergeFactor option
|
||||
*
|
||||
* mergeFactor determines how often segment indices are merged by addDocument().
|
||||
* With smaller values, less RAM is used while indexing,
|
||||
* and searches on unoptimized indices are faster,
|
||||
* but indexing speed is slower.
|
||||
* With larger values, more RAM is used during indexing,
|
||||
* and while searches on unoptimized indices are slower,
|
||||
* indexing is faster.
|
||||
* Thus larger values (> 10) are best for batch index creation,
|
||||
* and smaller values (< 10) for indices that are interactively maintained.
|
||||
*
|
||||
* Default value is 10
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function getMergeFactor()
|
||||
{
|
||||
return $this->_index->getMergeFactor();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set index mergeFactor option
|
||||
*
|
||||
* mergeFactor determines how often segment indices are merged by addDocument().
|
||||
* With smaller values, less RAM is used while indexing,
|
||||
* and searches on unoptimized indices are faster,
|
||||
* but indexing speed is slower.
|
||||
* With larger values, more RAM is used during indexing,
|
||||
* and while searches on unoptimized indices are slower,
|
||||
* indexing is faster.
|
||||
* Thus larger values (> 10) are best for batch index creation,
|
||||
* and smaller values (< 10) for indices that are interactively maintained.
|
||||
*
|
||||
* Default value is 10
|
||||
*
|
||||
* @param integer $maxMergeDocs
|
||||
*/
|
||||
public function setMergeFactor($mergeFactor)
|
||||
{
|
||||
$this->_index->setMergeFactor($mergeFactor);
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a query against the index and returns an array
|
||||
* of Zend_Search_Lucene_Search_QueryHit objects.
|
||||
* Input is a string or Zend_Search_Lucene_Search_Query.
|
||||
*
|
||||
* @param mixed $query
|
||||
* @return array Zend_Search_Lucene_Search_QueryHit
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function find($query)
|
||||
{
|
||||
// actual parameter list
|
||||
$parameters = func_get_args();
|
||||
|
||||
// invoke $this->_index->find() method with specified parameters
|
||||
return call_user_func_array(array(&$this->_index, 'find'), $parameters);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of all unique field names that exist in this index.
|
||||
*
|
||||
* @param boolean $indexed
|
||||
* @return array
|
||||
*/
|
||||
public function getFieldNames($indexed = false)
|
||||
{
|
||||
return $this->_index->getFieldNames($indexed);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a Zend_Search_Lucene_Document object for the document
|
||||
* number $id in this index.
|
||||
*
|
||||
* @param integer|Zend_Search_Lucene_Search_QueryHit $id
|
||||
* @return Zend_Search_Lucene_Document
|
||||
*/
|
||||
public function getDocument($id)
|
||||
{
|
||||
return $this->_index->getDocument($id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if index contain documents with specified term.
|
||||
*
|
||||
* Is used for query optimization.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @return boolean
|
||||
*/
|
||||
public function hasTerm(Zend_Search_Lucene_Index_Term $term)
|
||||
{
|
||||
return $this->_index->hasTerm($term);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns IDs of all the documents containing term.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
* @return array
|
||||
*/
|
||||
public function termDocs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
|
||||
{
|
||||
return $this->_index->termDocs($term, $docsFilter);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns documents filter for all documents containing term.
|
||||
*
|
||||
* It performs the same operation as termDocs, but return result as
|
||||
* Zend_Search_Lucene_Index_DocsFilter object
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
* @return Zend_Search_Lucene_Index_DocsFilter
|
||||
*/
|
||||
public function termDocsFilter(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
|
||||
{
|
||||
return $this->_index->termDocsFilter($term, $docsFilter);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of all term freqs.
|
||||
* Return array structure: array( docId => freq, ...)
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
* @return integer
|
||||
*/
|
||||
public function termFreqs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
|
||||
{
|
||||
return $this->_index->termFreqs($term, $docsFilter);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of all term positions in the documents.
|
||||
* Return array structure: array( docId => array( pos1, pos2, ...), ...)
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
* @return array
|
||||
*/
|
||||
public function termPositions(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
|
||||
{
|
||||
return $this->_index->termPositions($term, $docsFilter);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of documents in this index containing the $term.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @return integer
|
||||
*/
|
||||
public function docFreq(Zend_Search_Lucene_Index_Term $term)
|
||||
{
|
||||
return $this->_index->docFreq($term);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrive similarity used by index reader
|
||||
*
|
||||
* @return Zend_Search_Lucene_Search_Similarity
|
||||
*/
|
||||
public function getSimilarity()
|
||||
{
|
||||
return $this->_index->getSimilarity();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a normalization factor for "field, document" pair.
|
||||
*
|
||||
* @param integer $id
|
||||
* @param string $fieldName
|
||||
* @return float
|
||||
*/
|
||||
public function norm($id, $fieldName)
|
||||
{
|
||||
return $this->_index->norm($id, $fieldName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if any documents have been deleted from this index.
|
||||
*
|
||||
* @return boolean
|
||||
*/
|
||||
public function hasDeletions()
|
||||
{
|
||||
return $this->_index->hasDeletions();
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes a document from the index.
|
||||
* $id is an internal document id
|
||||
*
|
||||
* @param integer|Zend_Search_Lucene_Search_QueryHit $id
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function delete($id)
|
||||
{
|
||||
return $this->_index->delete($id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a document to this index.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Document $document
|
||||
*/
|
||||
public function addDocument(Zend_Search_Lucene_Document $document)
|
||||
{
|
||||
$this->_index->addDocument($document);
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit changes resulting from delete() or undeleteAll() operations.
|
||||
*/
|
||||
public function commit()
|
||||
{
|
||||
$this->_index->commit();
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimize index.
|
||||
*
|
||||
* Merges all segments into one
|
||||
*/
|
||||
public function optimize()
|
||||
{
|
||||
$this->_index->optimize();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of all terms in this index.
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function terms()
|
||||
{
|
||||
return $this->_index->terms();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Reset terms stream.
|
||||
*/
|
||||
public function resetTermsStream()
|
||||
{
|
||||
$this->_index->resetTermsStream();
|
||||
}
|
||||
|
||||
/**
|
||||
* Skip terms stream up to specified term preffix.
|
||||
*
|
||||
* Prefix contains fully specified field info and portion of searched term
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $prefix
|
||||
*/
|
||||
public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
|
||||
{
|
||||
return $this->_index->skipTo($prefix);
|
||||
}
|
||||
|
||||
/**
|
||||
* Scans terms dictionary and returns next term
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_Term|null
|
||||
*/
|
||||
public function nextTerm()
|
||||
{
|
||||
return $this->_index->nextTerm();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns term in current position
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_Term|null
|
||||
*/
|
||||
public function currentTerm()
|
||||
{
|
||||
return $this->_index->currentTerm();
|
||||
}
|
||||
|
||||
/**
|
||||
* Close terms stream
|
||||
*
|
||||
* Should be used for resources clean up if stream is not read up to the end
|
||||
*/
|
||||
public function closeTermsStream()
|
||||
{
|
||||
$this->_index->closeTermsStream();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Undeletes all documents currently marked as deleted in this index.
|
||||
*/
|
||||
public function undeleteAll()
|
||||
{
|
||||
return $this->_index->undeleteAll();
|
||||
}
|
||||
|
||||
/**
|
||||
* Add reference to the index object
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public function addReference()
|
||||
{
|
||||
return $this->_index->addReference();
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove reference from the index object
|
||||
*
|
||||
* When reference count becomes zero, index is closed and resources are cleaned up
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public function removeReference()
|
||||
{
|
||||
return $this->_index->removeReference();
|
||||
}
|
||||
}
|
278
thirdparty/Zend/Search/Lucene/Search/BooleanExpressionRecognizer.php
vendored
Normal file
278
thirdparty/Zend/Search/Lucene/Search/BooleanExpressionRecognizer.php
vendored
Normal file
@ -0,0 +1,278 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: BooleanExpressionRecognizer.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_FSM */
|
||||
require_once 'Zend/Search/Lucene/FSM.php';
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_BooleanExpressionRecognizer extends Zend_Search_Lucene_FSM
|
||||
{
|
||||
/** State Machine states */
|
||||
const ST_START = 0;
|
||||
const ST_LITERAL = 1;
|
||||
const ST_NOT_OPERATOR = 2;
|
||||
const ST_AND_OPERATOR = 3;
|
||||
const ST_OR_OPERATOR = 4;
|
||||
|
||||
/** Input symbols */
|
||||
const IN_LITERAL = 0;
|
||||
const IN_NOT_OPERATOR = 1;
|
||||
const IN_AND_OPERATOR = 2;
|
||||
const IN_OR_OPERATOR = 3;
|
||||
|
||||
|
||||
/**
|
||||
* NOT operator signal
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
private $_negativeLiteral = false;
|
||||
|
||||
/**
|
||||
* Current literal
|
||||
*
|
||||
* @var mixed
|
||||
*/
|
||||
private $_literal;
|
||||
|
||||
|
||||
/**
|
||||
* Set of boolean query conjunctions
|
||||
*
|
||||
* Each conjunction is an array of conjunction elements
|
||||
* Each conjunction element is presented with two-elements array:
|
||||
* array(<literal>, <is_negative>)
|
||||
*
|
||||
* So, it has a structure:
|
||||
* array( array( array(<literal>, <is_negative>), // first literal of first conjuction
|
||||
* array(<literal>, <is_negative>), // second literal of first conjuction
|
||||
* ...
|
||||
* array(<literal>, <is_negative>)
|
||||
* ), // end of first conjuction
|
||||
* array( array(<literal>, <is_negative>), // first literal of second conjuction
|
||||
* array(<literal>, <is_negative>), // second literal of second conjuction
|
||||
* ...
|
||||
* array(<literal>, <is_negative>)
|
||||
* ), // end of second conjuction
|
||||
* ...
|
||||
* ) // end of structure
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_conjunctions = array();
|
||||
|
||||
/**
|
||||
* Current conjuction
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_currentConjunction = array();
|
||||
|
||||
|
||||
/**
|
||||
* Object constructor
|
||||
*/
|
||||
public function __construct()
|
||||
{
|
||||
parent::__construct( array(self::ST_START,
|
||||
self::ST_LITERAL,
|
||||
self::ST_NOT_OPERATOR,
|
||||
self::ST_AND_OPERATOR,
|
||||
self::ST_OR_OPERATOR),
|
||||
array(self::IN_LITERAL,
|
||||
self::IN_NOT_OPERATOR,
|
||||
self::IN_AND_OPERATOR,
|
||||
self::IN_OR_OPERATOR));
|
||||
|
||||
$emptyOperatorAction = new Zend_Search_Lucene_FSMAction($this, 'emptyOperatorAction');
|
||||
$emptyNotOperatorAction = new Zend_Search_Lucene_FSMAction($this, 'emptyNotOperatorAction');
|
||||
|
||||
$this->addRules(array( array(self::ST_START, self::IN_LITERAL, self::ST_LITERAL),
|
||||
array(self::ST_START, self::IN_NOT_OPERATOR, self::ST_NOT_OPERATOR),
|
||||
|
||||
array(self::ST_LITERAL, self::IN_AND_OPERATOR, self::ST_AND_OPERATOR),
|
||||
array(self::ST_LITERAL, self::IN_OR_OPERATOR, self::ST_OR_OPERATOR),
|
||||
array(self::ST_LITERAL, self::IN_LITERAL, self::ST_LITERAL, $emptyOperatorAction),
|
||||
array(self::ST_LITERAL, self::IN_NOT_OPERATOR, self::ST_NOT_OPERATOR, $emptyNotOperatorAction),
|
||||
|
||||
array(self::ST_NOT_OPERATOR, self::IN_LITERAL, self::ST_LITERAL),
|
||||
|
||||
array(self::ST_AND_OPERATOR, self::IN_LITERAL, self::ST_LITERAL),
|
||||
array(self::ST_AND_OPERATOR, self::IN_NOT_OPERATOR, self::ST_NOT_OPERATOR),
|
||||
|
||||
array(self::ST_OR_OPERATOR, self::IN_LITERAL, self::ST_LITERAL),
|
||||
array(self::ST_OR_OPERATOR, self::IN_NOT_OPERATOR, self::ST_NOT_OPERATOR),
|
||||
));
|
||||
|
||||
$notOperatorAction = new Zend_Search_Lucene_FSMAction($this, 'notOperatorAction');
|
||||
$orOperatorAction = new Zend_Search_Lucene_FSMAction($this, 'orOperatorAction');
|
||||
$literalAction = new Zend_Search_Lucene_FSMAction($this, 'literalAction');
|
||||
|
||||
|
||||
$this->addEntryAction(self::ST_NOT_OPERATOR, $notOperatorAction);
|
||||
$this->addEntryAction(self::ST_OR_OPERATOR, $orOperatorAction);
|
||||
$this->addEntryAction(self::ST_LITERAL, $literalAction);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Process next operator.
|
||||
*
|
||||
* Operators are defined by class constants: IN_AND_OPERATOR, IN_OR_OPERATOR and IN_NOT_OPERATOR
|
||||
*
|
||||
* @param integer $operator
|
||||
*/
|
||||
public function processOperator($operator)
|
||||
{
|
||||
$this->process($operator);
|
||||
}
|
||||
|
||||
/**
|
||||
* Process expression literal.
|
||||
*
|
||||
* @param integer $operator
|
||||
*/
|
||||
public function processLiteral($literal)
|
||||
{
|
||||
$this->_literal = $literal;
|
||||
|
||||
$this->process(self::IN_LITERAL);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finish an expression and return result
|
||||
*
|
||||
* Result is a set of boolean query conjunctions
|
||||
*
|
||||
* Each conjunction is an array of conjunction elements
|
||||
* Each conjunction element is presented with two-elements array:
|
||||
* array(<literal>, <is_negative>)
|
||||
*
|
||||
* So, it has a structure:
|
||||
* array( array( array(<literal>, <is_negative>), // first literal of first conjuction
|
||||
* array(<literal>, <is_negative>), // second literal of first conjuction
|
||||
* ...
|
||||
* array(<literal>, <is_negative>)
|
||||
* ), // end of first conjuction
|
||||
* array( array(<literal>, <is_negative>), // first literal of second conjuction
|
||||
* array(<literal>, <is_negative>), // second literal of second conjuction
|
||||
* ...
|
||||
* array(<literal>, <is_negative>)
|
||||
* ), // end of second conjuction
|
||||
* ...
|
||||
* ) // end of structure
|
||||
*
|
||||
* @return array
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function finishExpression()
|
||||
{
|
||||
if ($this->getState() != self::ST_LITERAL) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Literal expected.');
|
||||
}
|
||||
|
||||
$this->_conjunctions[] = $this->_currentConjunction;
|
||||
|
||||
return $this->_conjunctions;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*********************************************************************
|
||||
* Actions implementation
|
||||
*********************************************************************/
|
||||
|
||||
/**
|
||||
* default (omitted) operator processing
|
||||
*/
|
||||
public function emptyOperatorAction()
|
||||
{
|
||||
/** Zend_Search_Lucene_Search_QueryParser */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParser.php';
|
||||
|
||||
if (Zend_Search_Lucene_Search_QueryParser::getDefaultOperator() == Zend_Search_Lucene_Search_QueryParser::B_AND) {
|
||||
// Do nothing
|
||||
} else {
|
||||
$this->orOperatorAction();
|
||||
}
|
||||
|
||||
// Process literal
|
||||
$this->literalAction();
|
||||
}
|
||||
|
||||
/**
|
||||
* default (omitted) + NOT operator processing
|
||||
*/
|
||||
public function emptyNotOperatorAction()
|
||||
{
|
||||
/** Zend_Search_Lucene_Search_QueryParser */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParser.php';
|
||||
|
||||
if (Zend_Search_Lucene_Search_QueryParser::getDefaultOperator() == Zend_Search_Lucene_Search_QueryParser::B_AND) {
|
||||
// Do nothing
|
||||
} else {
|
||||
$this->orOperatorAction();
|
||||
}
|
||||
|
||||
// Process NOT operator
|
||||
$this->notOperatorAction();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* NOT operator processing
|
||||
*/
|
||||
public function notOperatorAction()
|
||||
{
|
||||
$this->_negativeLiteral = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* OR operator processing
|
||||
* Close current conjunction
|
||||
*/
|
||||
public function orOperatorAction()
|
||||
{
|
||||
$this->_conjunctions[] = $this->_currentConjunction;
|
||||
$this->_currentConjunction = array();
|
||||
}
|
||||
|
||||
/**
|
||||
* Literal processing
|
||||
*/
|
||||
public function literalAction()
|
||||
{
|
||||
// Add literal to the current conjunction
|
||||
$this->_currentConjunction[] = array($this->_literal, !$this->_negativeLiteral);
|
||||
|
||||
// Switch off negative signal
|
||||
$this->_negativeLiteral = false;
|
||||
}
|
||||
}
|
94
thirdparty/Zend/Search/Lucene/Search/Highlighter/Default.php
vendored
Normal file
94
thirdparty/Zend/Search/Lucene/Search/Highlighter/Default.php
vendored
Normal file
@ -0,0 +1,94 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Default.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
/** @see Zend_Search_Lucene_Search_Highlighter_Interface */
|
||||
require_once 'Zend/Search/Lucene/Search/Highlighter/Interface.php';
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Highlighter_Default implements Zend_Search_Lucene_Search_Highlighter_Interface
|
||||
{
|
||||
/**
|
||||
* List of colors for text highlighting
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $_highlightColors = array('#66ffff', '#ff66ff', '#ffff66',
|
||||
'#ff8888', '#88ff88', '#8888ff',
|
||||
'#88dddd', '#dd88dd', '#dddd88',
|
||||
'#aaddff', '#aaffdd', '#ddaaff',
|
||||
'#ddffaa', '#ffaadd', '#ffddaa');
|
||||
|
||||
/**
|
||||
* Index of current color for highlighting
|
||||
*
|
||||
* Index is increased at each highlight() call, so terms matching different queries are highlighted using different colors.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
protected $_currentColorIndex = 0;
|
||||
|
||||
/**
|
||||
* HTML document for highlighting
|
||||
*
|
||||
* @var Zend_Search_Lucene_Document_Html
|
||||
*/
|
||||
protected $_doc;
|
||||
|
||||
/**
|
||||
* Set document for highlighting.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Document_Html $document
|
||||
*/
|
||||
public function setDocument(Zend_Search_Lucene_Document_Html $document)
|
||||
{
|
||||
$this->_doc = $document;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get document for highlighting.
|
||||
*
|
||||
* @return Zend_Search_Lucene_Document_Html $document
|
||||
*/
|
||||
public function getDocument()
|
||||
{
|
||||
return $this->_doc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Highlight specified words
|
||||
*
|
||||
* @param string|array $words Words to highlight. They could be organized using the array or string.
|
||||
*/
|
||||
public function highlight($words)
|
||||
{
|
||||
$color = $this->_highlightColors[$this->_currentColorIndex];
|
||||
$this->_currentColorIndex = ($this->_currentColorIndex + 1) % count($this->_highlightColors);
|
||||
|
||||
$this->_doc->highlight($words, $color);
|
||||
}
|
||||
|
||||
}
|
53
thirdparty/Zend/Search/Lucene/Search/Highlighter/Interface.php
vendored
Normal file
53
thirdparty/Zend/Search/Lucene/Search/Highlighter/Interface.php
vendored
Normal file
@ -0,0 +1,53 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Interface.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
interface Zend_Search_Lucene_Search_Highlighter_Interface
|
||||
{
|
||||
/**
|
||||
* Set document for highlighting.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Document_Html $document
|
||||
*/
|
||||
public function setDocument(Zend_Search_Lucene_Document_Html $document);
|
||||
|
||||
/**
|
||||
* Get document for highlighting.
|
||||
*
|
||||
* @return Zend_Search_Lucene_Document_Html $document
|
||||
*/
|
||||
public function getDocument();
|
||||
|
||||
/**
|
||||
* Highlight specified words (method is invoked once per subquery)
|
||||
*
|
||||
* @param string|array $words Words to highlight. They could be organized using the array or string.
|
||||
*/
|
||||
public function highlight($words);
|
||||
}
|
233
thirdparty/Zend/Search/Lucene/Search/Query.php
vendored
Normal file
233
thirdparty/Zend/Search/Lucene/Search/Query.php
vendored
Normal file
@ -0,0 +1,233 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Query.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
abstract class Zend_Search_Lucene_Search_Query
|
||||
{
|
||||
/**
|
||||
* query boost factor
|
||||
*
|
||||
* @var float
|
||||
*/
|
||||
private $_boost = 1;
|
||||
|
||||
/**
|
||||
* Query weight
|
||||
*
|
||||
* @var Zend_Search_Lucene_Search_Weight
|
||||
*/
|
||||
protected $_weight = null;
|
||||
|
||||
/**
|
||||
* Current highlight color
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_currentColorIndex = 0;
|
||||
|
||||
/**
|
||||
* Gets the boost for this clause. Documents matching
|
||||
* this clause will (in addition to the normal weightings) have their score
|
||||
* multiplied by boost. The boost is 1.0 by default.
|
||||
*
|
||||
* @return float
|
||||
*/
|
||||
public function getBoost()
|
||||
{
|
||||
return $this->_boost;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the boost for this query clause to $boost.
|
||||
*
|
||||
* @param float $boost
|
||||
*/
|
||||
public function setBoost($boost)
|
||||
{
|
||||
$this->_boost = $boost;
|
||||
}
|
||||
|
||||
/**
|
||||
* Score specified document
|
||||
*
|
||||
* @param integer $docId
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return float
|
||||
*/
|
||||
abstract public function score($docId, Zend_Search_Lucene_Interface $reader);
|
||||
|
||||
/**
|
||||
* Get document ids likely matching the query
|
||||
*
|
||||
* It's an array with document ids as keys (performance considerations)
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
abstract public function matchedDocs();
|
||||
|
||||
/**
|
||||
* Execute query in context of index reader
|
||||
* It also initializes necessary internal structures
|
||||
*
|
||||
* Query specific implementation
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
*/
|
||||
abstract public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null);
|
||||
|
||||
/**
|
||||
* Constructs an appropriate Weight implementation for this query.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return Zend_Search_Lucene_Search_Weight
|
||||
*/
|
||||
abstract public function createWeight(Zend_Search_Lucene_Interface $reader);
|
||||
|
||||
/**
|
||||
* Constructs an initializes a Weight for a _top-level_query_.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
*/
|
||||
protected function _initWeight(Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
// Check, that it's a top-level query and query weight is not initialized yet.
|
||||
if ($this->_weight !== null) {
|
||||
return $this->_weight;
|
||||
}
|
||||
|
||||
$this->createWeight($reader);
|
||||
$sum = $this->_weight->sumOfSquaredWeights();
|
||||
$queryNorm = $reader->getSimilarity()->queryNorm($sum);
|
||||
$this->_weight->normalize($queryNorm);
|
||||
}
|
||||
|
||||
/**
|
||||
* Re-write query into primitive queries in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
abstract public function rewrite(Zend_Search_Lucene_Interface $index);
|
||||
|
||||
/**
|
||||
* Optimize query in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
abstract public function optimize(Zend_Search_Lucene_Interface $index);
|
||||
|
||||
/**
|
||||
* Reset query, so it can be reused within other queries or
|
||||
* with other indeces
|
||||
*/
|
||||
public function reset()
|
||||
{
|
||||
$this->_weight = null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Print a query
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
abstract public function __toString();
|
||||
|
||||
/**
|
||||
* Return query terms
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
abstract public function getQueryTerms();
|
||||
|
||||
/**
|
||||
* Query specific matches highlighting
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||||
*/
|
||||
abstract protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter);
|
||||
|
||||
/**
|
||||
* Highlight matches in $inputHTML
|
||||
*
|
||||
* @param string $inputHTML
|
||||
* @param string $defaultEncoding HTML encoding, is used if it's not specified using Content-type HTTP-EQUIV meta tag.
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface|null $highlighter
|
||||
* @return string
|
||||
*/
|
||||
public function highlightMatches($inputHTML, $defaultEncoding = '', $highlighter = null)
|
||||
{
|
||||
if ($highlighter === null) {
|
||||
require_once 'Zend/Search/Lucene/Search/Highlighter/Default.php';
|
||||
$highlighter = new Zend_Search_Lucene_Search_Highlighter_Default();
|
||||
}
|
||||
|
||||
/** Zend_Search_Lucene_Document_Html */
|
||||
require_once 'Zend/Search/Lucene/Document/Html.php';
|
||||
|
||||
$doc = Zend_Search_Lucene_Document_Html::loadHTML($inputHTML, false, $defaultEncoding);
|
||||
$highlighter->setDocument($doc);
|
||||
|
||||
$this->_highlightMatches($highlighter);
|
||||
|
||||
return $doc->getHTML();
|
||||
}
|
||||
|
||||
/**
|
||||
* Highlight matches in $inputHtmlFragment and return it (without HTML header and body tag)
|
||||
*
|
||||
* @param string $inputHtmlFragment
|
||||
* @param string $encoding Input HTML string encoding
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface|null $highlighter
|
||||
* @return string
|
||||
*/
|
||||
public function htmlFragmentHighlightMatches($inputHtmlFragment, $encoding = 'UTF-8', $highlighter = null)
|
||||
{
|
||||
if ($highlighter === null) {
|
||||
require_once 'Zend/Search/Lucene/Search/Highlighter/Default.php';
|
||||
$highlighter = new Zend_Search_Lucene_Search_Highlighter_Default();
|
||||
}
|
||||
|
||||
$inputHTML = '<html><head><META HTTP-EQUIV="Content-type" CONTENT="text/html; charset=UTF-8"/></head><body>'
|
||||
. iconv($encoding, 'UTF-8//IGNORE', $inputHtmlFragment) . '</body></html>';
|
||||
|
||||
/** Zend_Search_Lucene_Document_Html */
|
||||
require_once 'Zend/Search/Lucene/Document/Html.php';
|
||||
|
||||
$doc = Zend_Search_Lucene_Document_Html::loadHTML($inputHTML);
|
||||
$highlighter->setDocument($doc);
|
||||
|
||||
$this->_highlightMatches($highlighter);
|
||||
|
||||
return $doc->getHtmlBody();
|
||||
}
|
||||
}
|
||||
|
815
thirdparty/Zend/Search/Lucene/Search/Query/Boolean.php
vendored
Normal file
815
thirdparty/Zend/Search/Lucene/Search/Query/Boolean.php
vendored
Normal file
@ -0,0 +1,815 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Boolean.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query */
|
||||
require_once 'Zend/Search/Lucene/Search/Query.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Query_Boolean extends Zend_Search_Lucene_Search_Query
|
||||
{
|
||||
|
||||
/**
|
||||
* Subqueries
|
||||
* Array of Zend_Search_Lucene_Search_Query
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_subqueries = array();
|
||||
|
||||
/**
|
||||
* Subqueries signs.
|
||||
* If true then subquery is required.
|
||||
* If false then subquery is prohibited.
|
||||
* If null then subquery is neither prohibited, nor required
|
||||
*
|
||||
* If array is null then all subqueries are required
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_signs = array();
|
||||
|
||||
/**
|
||||
* Result vector.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_resVector = null;
|
||||
|
||||
/**
|
||||
* A score factor based on the fraction of all query subqueries
|
||||
* that a document contains.
|
||||
* float for conjunction queries
|
||||
* array of float for non conjunction queries
|
||||
*
|
||||
* @var mixed
|
||||
*/
|
||||
private $_coord = null;
|
||||
|
||||
|
||||
/**
|
||||
* Class constructor. Create a new Boolean query object.
|
||||
*
|
||||
* if $signs array is omitted then all subqueries are required
|
||||
* it differs from addSubquery() behavior, but should never be used
|
||||
*
|
||||
* @param array $subqueries Array of Zend_Search_Search_Query objects
|
||||
* @param array $signs Array of signs. Sign is boolean|null.
|
||||
* @return void
|
||||
*/
|
||||
public function __construct($subqueries = null, $signs = null)
|
||||
{
|
||||
if (is_array($subqueries)) {
|
||||
$this->_subqueries = $subqueries;
|
||||
|
||||
$this->_signs = null;
|
||||
// Check if all subqueries are required
|
||||
if (is_array($signs)) {
|
||||
foreach ($signs as $sign ) {
|
||||
if ($sign !== true) {
|
||||
$this->_signs = $signs;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add a $subquery (Zend_Search_Lucene_Search_Query) to this query.
|
||||
*
|
||||
* The sign is specified as:
|
||||
* TRUE - subquery is required
|
||||
* FALSE - subquery is prohibited
|
||||
* NULL - subquery is neither prohibited, nor required
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_Query $subquery
|
||||
* @param boolean|null $sign
|
||||
* @return void
|
||||
*/
|
||||
public function addSubquery(Zend_Search_Lucene_Search_Query $subquery, $sign=null) {
|
||||
if ($sign !== true || $this->_signs !== null) { // Skip, if all subqueries are required
|
||||
if ($this->_signs === null) { // Check, If all previous subqueries are required
|
||||
$this->_signs = array();
|
||||
foreach ($this->_subqueries as $prevSubquery) {
|
||||
$this->_signs[] = true;
|
||||
}
|
||||
}
|
||||
$this->_signs[] = $sign;
|
||||
}
|
||||
|
||||
$this->_subqueries[] = $subquery;
|
||||
}
|
||||
|
||||
/**
|
||||
* Re-write queries into primitive queries
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function rewrite(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
$query = new Zend_Search_Lucene_Search_Query_Boolean();
|
||||
$query->setBoost($this->getBoost());
|
||||
|
||||
foreach ($this->_subqueries as $subqueryId => $subquery) {
|
||||
$query->addSubquery($subquery->rewrite($index),
|
||||
($this->_signs === null)? true : $this->_signs[$subqueryId]);
|
||||
}
|
||||
|
||||
return $query;
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimize query in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function optimize(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
$subqueries = array();
|
||||
$signs = array();
|
||||
|
||||
// Optimize all subqueries
|
||||
foreach ($this->_subqueries as $id => $subquery) {
|
||||
$subqueries[] = $subquery->optimize($index);
|
||||
$signs[] = ($this->_signs === null)? true : $this->_signs[$id];
|
||||
}
|
||||
|
||||
// Remove insignificant subqueries
|
||||
foreach ($subqueries as $id => $subquery) {
|
||||
if ($subquery instanceof Zend_Search_Lucene_Search_Query_Insignificant) {
|
||||
// Insignificant subquery has to be removed anyway
|
||||
unset($subqueries[$id]);
|
||||
unset($signs[$id]);
|
||||
}
|
||||
}
|
||||
if (count($subqueries) == 0) {
|
||||
// Boolean query doesn't has non-insignificant subqueries
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Insignificant();
|
||||
}
|
||||
// Check if all non-insignificant subqueries are prohibited
|
||||
$allProhibited = true;
|
||||
foreach ($signs as $sign) {
|
||||
if ($sign !== false) {
|
||||
$allProhibited = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ($allProhibited) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Insignificant();
|
||||
}
|
||||
|
||||
|
||||
// Check for empty subqueries
|
||||
foreach ($subqueries as $id => $subquery) {
|
||||
if ($subquery instanceof Zend_Search_Lucene_Search_Query_Empty) {
|
||||
if ($signs[$id] === true) {
|
||||
// Matching is required, but is actually empty
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Empty();
|
||||
} else {
|
||||
// Matching is optional or prohibited, but is empty
|
||||
// Remove it from subqueries and signs list
|
||||
unset($subqueries[$id]);
|
||||
unset($signs[$id]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check, if reduced subqueries list is empty
|
||||
if (count($subqueries) == 0) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Empty();
|
||||
}
|
||||
|
||||
// Check if all non-empty subqueries are prohibited
|
||||
$allProhibited = true;
|
||||
foreach ($signs as $sign) {
|
||||
if ($sign !== false) {
|
||||
$allProhibited = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ($allProhibited) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Empty();
|
||||
}
|
||||
|
||||
|
||||
// Check, if reduced subqueries list has only one entry
|
||||
if (count($subqueries) == 1) {
|
||||
// It's a query with only one required or optional clause
|
||||
// (it's already checked, that it's not a prohibited clause)
|
||||
|
||||
if ($this->getBoost() == 1) {
|
||||
return reset($subqueries);
|
||||
}
|
||||
|
||||
$optimizedQuery = clone reset($subqueries);
|
||||
$optimizedQuery->setBoost($optimizedQuery->getBoost()*$this->getBoost());
|
||||
|
||||
return $optimizedQuery;
|
||||
}
|
||||
|
||||
|
||||
// Prepare first candidate for optimized query
|
||||
$optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs);
|
||||
$optimizedQuery->setBoost($this->getBoost());
|
||||
|
||||
|
||||
$terms = array();
|
||||
$tsigns = array();
|
||||
$boostFactors = array();
|
||||
|
||||
// Try to decompose term and multi-term subqueries
|
||||
foreach ($subqueries as $id => $subquery) {
|
||||
if ($subquery instanceof Zend_Search_Lucene_Search_Query_Term) {
|
||||
$terms[] = $subquery->getTerm();
|
||||
$tsigns[] = $signs[$id];
|
||||
$boostFactors[] = $subquery->getBoost();
|
||||
|
||||
// remove subquery from a subqueries list
|
||||
unset($subqueries[$id]);
|
||||
unset($signs[$id]);
|
||||
} else if ($subquery instanceof Zend_Search_Lucene_Search_Query_MultiTerm) {
|
||||
$subTerms = $subquery->getTerms();
|
||||
$subSigns = $subquery->getSigns();
|
||||
|
||||
if ($signs[$id] === true) {
|
||||
// It's a required multi-term subquery.
|
||||
// Something like '... +(+term1 -term2 term3 ...) ...'
|
||||
|
||||
// Multi-term required subquery can be decomposed only if it contains
|
||||
// required terms and doesn't contain prohibited terms:
|
||||
// ... +(+term1 term2 ...) ... => ... +term1 term2 ...
|
||||
//
|
||||
// Check this
|
||||
$hasRequired = false;
|
||||
$hasProhibited = false;
|
||||
if ($subSigns === null) {
|
||||
// All subterms are required
|
||||
$hasRequired = true;
|
||||
} else {
|
||||
foreach ($subSigns as $sign) {
|
||||
if ($sign === true) {
|
||||
$hasRequired = true;
|
||||
} else if ($sign === false) {
|
||||
$hasProhibited = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Continue if subquery has prohibited terms or doesn't have required terms
|
||||
if ($hasProhibited || !$hasRequired) {
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach ($subTerms as $termId => $term) {
|
||||
$terms[] = $term;
|
||||
$tsigns[] = ($subSigns === null)? true : $subSigns[$termId];
|
||||
$boostFactors[] = $subquery->getBoost();
|
||||
}
|
||||
|
||||
// remove subquery from a subqueries list
|
||||
unset($subqueries[$id]);
|
||||
unset($signs[$id]);
|
||||
|
||||
} else { // $signs[$id] === null || $signs[$id] === false
|
||||
// It's an optional or prohibited multi-term subquery.
|
||||
// Something like '... (+term1 -term2 term3 ...) ...'
|
||||
// or
|
||||
// something like '... -(+term1 -term2 term3 ...) ...'
|
||||
|
||||
// Multi-term optional and required subqueries can be decomposed
|
||||
// only if all terms are optional.
|
||||
//
|
||||
// Check if all terms are optional.
|
||||
$onlyOptional = true;
|
||||
if ($subSigns === null) {
|
||||
// All subterms are required
|
||||
$onlyOptional = false;
|
||||
} else {
|
||||
foreach ($subSigns as $sign) {
|
||||
if ($sign !== null) {
|
||||
$onlyOptional = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Continue if non-optional terms are presented in this multi-term subquery
|
||||
if (!$onlyOptional) {
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach ($subTerms as $termId => $term) {
|
||||
$terms[] = $term;
|
||||
$tsigns[] = ($signs[$id] === null)? null /* optional */ :
|
||||
false /* prohibited */;
|
||||
$boostFactors[] = $subquery->getBoost();
|
||||
}
|
||||
|
||||
// remove subquery from a subqueries list
|
||||
unset($subqueries[$id]);
|
||||
unset($signs[$id]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Check, if there are no decomposed subqueries
|
||||
if (count($terms) == 0 ) {
|
||||
// return prepared candidate
|
||||
return $optimizedQuery;
|
||||
}
|
||||
|
||||
|
||||
// Check, if all subqueries have been decomposed and all terms has the same boost factor
|
||||
if (count($subqueries) == 0 && count(array_unique($boostFactors)) == 1) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
|
||||
$optimizedQuery = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $tsigns);
|
||||
$optimizedQuery->setBoost(reset($boostFactors)*$this->getBoost());
|
||||
|
||||
return $optimizedQuery;
|
||||
}
|
||||
|
||||
|
||||
// This boolean query can't be transformed to Term/MultiTerm query and still contains
|
||||
// several subqueries
|
||||
|
||||
// Separate prohibited terms
|
||||
$prohibitedTerms = array();
|
||||
foreach ($terms as $id => $term) {
|
||||
if ($tsigns[$id] === false) {
|
||||
$prohibitedTerms[] = $term;
|
||||
|
||||
unset($terms[$id]);
|
||||
unset($tsigns[$id]);
|
||||
unset($boostFactors[$id]);
|
||||
}
|
||||
}
|
||||
|
||||
if (count($terms) == 1) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Term.php';
|
||||
$clause = new Zend_Search_Lucene_Search_Query_Term(reset($terms));
|
||||
$clause->setBoost(reset($boostFactors));
|
||||
|
||||
$subqueries[] = $clause;
|
||||
$signs[] = reset($tsigns);
|
||||
|
||||
// Clear terms list
|
||||
$terms = array();
|
||||
} else if (count($terms) > 1 && count(array_unique($boostFactors)) == 1) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
|
||||
$clause = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $tsigns);
|
||||
$clause->setBoost(reset($boostFactors));
|
||||
|
||||
$subqueries[] = $clause;
|
||||
// Clause sign is 'required' if clause contains required terms. 'Optional' otherwise.
|
||||
$signs[] = (in_array(true, $tsigns))? true : null;
|
||||
|
||||
// Clear terms list
|
||||
$terms = array();
|
||||
}
|
||||
|
||||
if (count($prohibitedTerms) == 1) {
|
||||
// (boost factors are not significant for prohibited clauses)
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Term.php';
|
||||
$subqueries[] = new Zend_Search_Lucene_Search_Query_Term(reset($prohibitedTerms));
|
||||
$signs[] = false;
|
||||
|
||||
// Clear prohibited terms list
|
||||
$prohibitedTerms = array();
|
||||
} else if (count($prohibitedTerms) > 1) {
|
||||
// prepare signs array
|
||||
$prohibitedSigns = array();
|
||||
foreach ($prohibitedTerms as $id => $term) {
|
||||
// all prohibited term are grouped as optional into multi-term query
|
||||
$prohibitedSigns[$id] = null;
|
||||
}
|
||||
|
||||
// (boost factors are not significant for prohibited clauses)
|
||||
require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
|
||||
$subqueries[] = new Zend_Search_Lucene_Search_Query_MultiTerm($prohibitedTerms, $prohibitedSigns);
|
||||
// Clause sign is 'prohibited'
|
||||
$signs[] = false;
|
||||
|
||||
// Clear terms list
|
||||
$prohibitedTerms = array();
|
||||
}
|
||||
|
||||
/** @todo Group terms with the same boost factors together */
|
||||
|
||||
// Check, that all terms are processed
|
||||
// Replace candidate for optimized query
|
||||
if (count($terms) == 0 && count($prohibitedTerms) == 0) {
|
||||
$optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs);
|
||||
$optimizedQuery->setBoost($this->getBoost());
|
||||
}
|
||||
|
||||
return $optimizedQuery;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns subqueries
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getSubqueries()
|
||||
{
|
||||
return $this->_subqueries;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return subqueries signs
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getSigns()
|
||||
{
|
||||
return $this->_signs;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructs an appropriate Weight implementation for this query.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return Zend_Search_Lucene_Search_Weight
|
||||
*/
|
||||
public function createWeight(Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Search/Weight/Boolean.php';
|
||||
$this->_weight = new Zend_Search_Lucene_Search_Weight_Boolean($this, $reader);
|
||||
return $this->_weight;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculate result vector for Conjunction query
|
||||
* (like '<subquery1> AND <subquery2> AND <subquery3>')
|
||||
*/
|
||||
private function _calculateConjunctionResult()
|
||||
{
|
||||
$this->_resVector = null;
|
||||
|
||||
if (count($this->_subqueries) == 0) {
|
||||
$this->_resVector = array();
|
||||
}
|
||||
|
||||
$resVectors = array();
|
||||
$resVectorsSizes = array();
|
||||
$resVectorsIds = array(); // is used to prevent arrays comparison
|
||||
foreach ($this->_subqueries as $subqueryId => $subquery) {
|
||||
$resVectors[] = $subquery->matchedDocs();
|
||||
$resVectorsSizes[] = count(end($resVectors));
|
||||
$resVectorsIds[] = $subqueryId;
|
||||
}
|
||||
// sort resvectors in order of subquery cardinality increasing
|
||||
array_multisort($resVectorsSizes, SORT_ASC, SORT_NUMERIC,
|
||||
$resVectorsIds, SORT_ASC, SORT_NUMERIC,
|
||||
$resVectors);
|
||||
|
||||
foreach ($resVectors as $nextResVector) {
|
||||
if($this->_resVector === null) {
|
||||
$this->_resVector = $nextResVector;
|
||||
} else {
|
||||
//$this->_resVector = array_intersect_key($this->_resVector, $nextResVector);
|
||||
|
||||
/**
|
||||
* This code is used as workaround for array_intersect_key() slowness problem.
|
||||
*/
|
||||
$updatedVector = array();
|
||||
foreach ($this->_resVector as $id => $value) {
|
||||
if (isset($nextResVector[$id])) {
|
||||
$updatedVector[$id] = $value;
|
||||
}
|
||||
}
|
||||
$this->_resVector = $updatedVector;
|
||||
}
|
||||
|
||||
if (count($this->_resVector) == 0) {
|
||||
// Empty result set, we don't need to check other terms
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// ksort($this->_resVector, SORT_NUMERIC);
|
||||
// Used algorithm doesn't change elements order
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculate result vector for non Conjunction query
|
||||
* (like '<subquery1> AND <subquery2> AND NOT <subquery3> OR <subquery4>')
|
||||
*/
|
||||
private function _calculateNonConjunctionResult()
|
||||
{
|
||||
$requiredVectors = array();
|
||||
$requiredVectorsSizes = array();
|
||||
$requiredVectorsIds = array(); // is used to prevent arrays comparison
|
||||
|
||||
$optional = array();
|
||||
|
||||
foreach ($this->_subqueries as $subqueryId => $subquery) {
|
||||
if ($this->_signs[$subqueryId] === true) {
|
||||
// required
|
||||
$requiredVectors[] = $subquery->matchedDocs();
|
||||
$requiredVectorsSizes[] = count(end($requiredVectors));
|
||||
$requiredVectorsIds[] = $subqueryId;
|
||||
} elseif ($this->_signs[$subqueryId] === false) {
|
||||
// prohibited
|
||||
// Do nothing. matchedDocs() may include non-matching id's
|
||||
// Calculating prohibited vector may take significant time, but do not affect the result
|
||||
// Skipped.
|
||||
} else {
|
||||
// neither required, nor prohibited
|
||||
// array union
|
||||
$optional += $subquery->matchedDocs();
|
||||
}
|
||||
}
|
||||
|
||||
// sort resvectors in order of subquery cardinality increasing
|
||||
array_multisort($requiredVectorsSizes, SORT_ASC, SORT_NUMERIC,
|
||||
$requiredVectorsIds, SORT_ASC, SORT_NUMERIC,
|
||||
$requiredVectors);
|
||||
|
||||
$required = null;
|
||||
foreach ($requiredVectors as $nextResVector) {
|
||||
if($required === null) {
|
||||
$required = $nextResVector;
|
||||
} else {
|
||||
//$required = array_intersect_key($required, $nextResVector);
|
||||
|
||||
/**
|
||||
* This code is used as workaround for array_intersect_key() slowness problem.
|
||||
*/
|
||||
$updatedVector = array();
|
||||
foreach ($required as $id => $value) {
|
||||
if (isset($nextResVector[$id])) {
|
||||
$updatedVector[$id] = $value;
|
||||
}
|
||||
}
|
||||
$required = $updatedVector;
|
||||
}
|
||||
|
||||
if (count($required) == 0) {
|
||||
// Empty result set, we don't need to check other terms
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if ($required !== null) {
|
||||
$this->_resVector = &$required;
|
||||
} else {
|
||||
$this->_resVector = &$optional;
|
||||
}
|
||||
|
||||
ksort($this->_resVector, SORT_NUMERIC);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Score calculator for conjunction queries (all subqueries are required)
|
||||
*
|
||||
* @param integer $docId
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return float
|
||||
*/
|
||||
public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
if ($this->_coord === null) {
|
||||
$this->_coord = $reader->getSimilarity()->coord(count($this->_subqueries),
|
||||
count($this->_subqueries) );
|
||||
}
|
||||
|
||||
$score = 0;
|
||||
|
||||
foreach ($this->_subqueries as $subquery) {
|
||||
$subscore = $subquery->score($docId, $reader);
|
||||
|
||||
if ($subscore == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
$score += $subquery->score($docId, $reader) * $this->_coord;
|
||||
}
|
||||
|
||||
return $score * $this->_coord * $this->getBoost();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Score calculator for non conjunction queries (not all subqueries are required)
|
||||
*
|
||||
* @param integer $docId
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return float
|
||||
*/
|
||||
public function _nonConjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
if ($this->_coord === null) {
|
||||
$this->_coord = array();
|
||||
|
||||
$maxCoord = 0;
|
||||
foreach ($this->_signs as $sign) {
|
||||
if ($sign !== false /* not prohibited */) {
|
||||
$maxCoord++;
|
||||
}
|
||||
}
|
||||
|
||||
for ($count = 0; $count <= $maxCoord; $count++) {
|
||||
$this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord);
|
||||
}
|
||||
}
|
||||
|
||||
$score = 0;
|
||||
$matchedSubqueries = 0;
|
||||
foreach ($this->_subqueries as $subqueryId => $subquery) {
|
||||
$subscore = $subquery->score($docId, $reader);
|
||||
|
||||
// Prohibited
|
||||
if ($this->_signs[$subqueryId] === false && $subscore != 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// is required, but doen't match
|
||||
if ($this->_signs[$subqueryId] === true && $subscore == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ($subscore != 0) {
|
||||
$matchedSubqueries++;
|
||||
$score += $subscore;
|
||||
}
|
||||
}
|
||||
|
||||
return $score * $this->_coord[$matchedSubqueries] * $this->getBoost();
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute query in context of index reader
|
||||
* It also initializes necessary internal structures
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
*/
|
||||
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
|
||||
{
|
||||
// Initialize weight if it's not done yet
|
||||
$this->_initWeight($reader);
|
||||
|
||||
if ($docsFilter === null) {
|
||||
// Create local documents filter if it's not provided by upper query
|
||||
require_once 'Zend/Search/Lucene/Index/DocsFilter.php';
|
||||
$docsFilter = new Zend_Search_Lucene_Index_DocsFilter();
|
||||
}
|
||||
|
||||
foreach ($this->_subqueries as $subqueryId => $subquery) {
|
||||
if ($this->_signs == null || $this->_signs[$subqueryId] === true) {
|
||||
// Subquery is required
|
||||
$subquery->execute($reader, $docsFilter);
|
||||
} else {
|
||||
$subquery->execute($reader);
|
||||
}
|
||||
}
|
||||
|
||||
if ($this->_signs === null) {
|
||||
$this->_calculateConjunctionResult();
|
||||
} else {
|
||||
$this->_calculateNonConjunctionResult();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Get document ids likely matching the query
|
||||
*
|
||||
* It's an array with document ids as keys (performance considerations)
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function matchedDocs()
|
||||
{
|
||||
return $this->_resVector;
|
||||
}
|
||||
|
||||
/**
|
||||
* Score specified document
|
||||
*
|
||||
* @param integer $docId
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return float
|
||||
*/
|
||||
public function score($docId, Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
if (isset($this->_resVector[$docId])) {
|
||||
if ($this->_signs === null) {
|
||||
return $this->_conjunctionScore($docId, $reader);
|
||||
} else {
|
||||
return $this->_nonConjunctionScore($docId, $reader);
|
||||
}
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return query terms
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getQueryTerms()
|
||||
{
|
||||
$terms = array();
|
||||
|
||||
foreach ($this->_subqueries as $id => $subquery) {
|
||||
if ($this->_signs === null || $this->_signs[$id] !== false) {
|
||||
$terms = array_merge($terms, $subquery->getQueryTerms());
|
||||
}
|
||||
}
|
||||
|
||||
return $terms;
|
||||
}
|
||||
|
||||
/**
|
||||
* Query specific matches highlighting
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||||
*/
|
||||
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
|
||||
{
|
||||
foreach ($this->_subqueries as $id => $subquery) {
|
||||
if ($this->_signs === null || $this->_signs[$id] !== false) {
|
||||
$subquery->_highlightMatches($highlighter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Print a query
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
// It's used only for query visualisation, so we don't care about characters escaping
|
||||
|
||||
$query = '';
|
||||
|
||||
foreach ($this->_subqueries as $id => $subquery) {
|
||||
if ($id != 0) {
|
||||
$query .= ' ';
|
||||
}
|
||||
|
||||
if ($this->_signs === null || $this->_signs[$id] === true) {
|
||||
$query .= '+';
|
||||
} else if ($this->_signs[$id] === false) {
|
||||
$query .= '-';
|
||||
}
|
||||
|
||||
$query .= '(' . $subquery->__toString() . ')';
|
||||
}
|
||||
|
||||
if ($this->getBoost() != 1) {
|
||||
$query = '(' . $query . ')^' . round($this->getBoost(), 4);
|
||||
}
|
||||
|
||||
return $query;
|
||||
}
|
||||
}
|
||||
|
138
thirdparty/Zend/Search/Lucene/Search/Query/Empty.php
vendored
Normal file
138
thirdparty/Zend/Search/Lucene/Search/Query/Empty.php
vendored
Normal file
@ -0,0 +1,138 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Empty.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query */
|
||||
require_once 'Zend/Search/Lucene/Search/Query.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Query_Empty extends Zend_Search_Lucene_Search_Query
|
||||
{
|
||||
/**
|
||||
* Re-write query into primitive queries in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function rewrite(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimize query in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function optimize(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
// "Empty" query is a primitive query and don't need to be optimized
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an appropriate Weight implementation for this query.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return Zend_Search_Lucene_Search_Weight
|
||||
*/
|
||||
public function createWeight(Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Search/Weight/Empty.php';
|
||||
return new Zend_Search_Lucene_Search_Weight_Empty();
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute query in context of index reader
|
||||
* It also initializes necessary internal structures
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
*/
|
||||
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
|
||||
{
|
||||
// Do nothing
|
||||
}
|
||||
|
||||
/**
|
||||
* Get document ids likely matching the query
|
||||
*
|
||||
* It's an array with document ids as keys (performance considerations)
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function matchedDocs()
|
||||
{
|
||||
return array();
|
||||
}
|
||||
|
||||
/**
|
||||
* Score specified document
|
||||
*
|
||||
* @param integer $docId
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return float
|
||||
*/
|
||||
public function score($docId, Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return query terms
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getQueryTerms()
|
||||
{
|
||||
return array();
|
||||
}
|
||||
|
||||
/**
|
||||
* Query specific matches highlighting
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||||
*/
|
||||
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
|
||||
{
|
||||
// Do nothing
|
||||
}
|
||||
|
||||
/**
|
||||
* Print a query
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
return '<EmptyQuery>';
|
||||
}
|
||||
}
|
||||
|
493
thirdparty/Zend/Search/Lucene/Search/Query/Fuzzy.php
vendored
Normal file
493
thirdparty/Zend/Search/Lucene/Search/Query/Fuzzy.php
vendored
Normal file
@ -0,0 +1,493 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Fuzzy.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query */
|
||||
require_once 'Zend/Search/Lucene/Search/Query.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Query
|
||||
{
|
||||
/** Default minimum similarity */
|
||||
const DEFAULT_MIN_SIMILARITY = 0.5;
|
||||
|
||||
/**
|
||||
* Maximum number of matched terms.
|
||||
* Apache Lucene defines this limitation as boolean query maximum number of clauses:
|
||||
* org.apache.lucene.search.BooleanQuery.getMaxClauseCount()
|
||||
*/
|
||||
const MAX_CLAUSE_COUNT = 1024;
|
||||
|
||||
/**
|
||||
* Array of precalculated max distances
|
||||
*
|
||||
* keys are integers representing a word size
|
||||
*/
|
||||
private $_maxDistances = array();
|
||||
|
||||
/**
|
||||
* Base searching term.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Index_Term
|
||||
*/
|
||||
private $_term;
|
||||
|
||||
/**
|
||||
* A value between 0 and 1 to set the required similarity
|
||||
* between the query term and the matching terms. For example, for a
|
||||
* _minimumSimilarity of 0.5 a term of the same length
|
||||
* as the query term is considered similar to the query term if the edit distance
|
||||
* between both terms is less than length(term)*0.5
|
||||
*
|
||||
* @var float
|
||||
*/
|
||||
private $_minimumSimilarity;
|
||||
|
||||
/**
|
||||
* The length of common (non-fuzzy) prefix
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_prefixLength;
|
||||
|
||||
/**
|
||||
* Matched terms.
|
||||
*
|
||||
* Matched terms list.
|
||||
* It's filled during the search (rewrite operation) and may be used for search result
|
||||
* post-processing
|
||||
*
|
||||
* Array of Zend_Search_Lucene_Index_Term objects
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_matches = null;
|
||||
|
||||
/**
|
||||
* Matched terms scores
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_scores = null;
|
||||
|
||||
/**
|
||||
* Array of the term keys.
|
||||
* Used to sort terms in alphabetical order if terms have the same socres
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_termKeys = null;
|
||||
|
||||
/**
|
||||
* Default non-fuzzy prefix length
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private static $_defaultPrefixLength = 3;
|
||||
|
||||
/**
|
||||
* Zend_Search_Lucene_Search_Query_Wildcard constructor.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param float $minimumSimilarity
|
||||
* @param integer $prefixLength
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function __construct(Zend_Search_Lucene_Index_Term $term, $minimumSimilarity = self::DEFAULT_MIN_SIMILARITY, $prefixLength = null)
|
||||
{
|
||||
if ($minimumSimilarity < 0) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('minimumSimilarity cannot be less than 0');
|
||||
}
|
||||
if ($minimumSimilarity >= 1) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('minimumSimilarity cannot be greater than or equal to 1');
|
||||
}
|
||||
if ($prefixLength < 0) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('prefixLength cannot be less than 0');
|
||||
}
|
||||
|
||||
$this->_term = $term;
|
||||
$this->_minimumSimilarity = $minimumSimilarity;
|
||||
$this->_prefixLength = ($prefixLength !== null)? $prefixLength : self::$_defaultPrefixLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get default non-fuzzy prefix length
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public static function getDefaultPrefixLength()
|
||||
{
|
||||
return self::$_defaultPrefixLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set default non-fuzzy prefix length
|
||||
*
|
||||
* @param integer $defaultPrefixLength
|
||||
*/
|
||||
public static function setDefaultPrefixLength($defaultPrefixLength)
|
||||
{
|
||||
self::$_defaultPrefixLength = $defaultPrefixLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate maximum distance for specified word length
|
||||
*
|
||||
* @param integer $prefixLength
|
||||
* @param integer $termLength
|
||||
* @param integer $length
|
||||
* @return integer
|
||||
*/
|
||||
private function _calculateMaxDistance($prefixLength, $termLength, $length)
|
||||
{
|
||||
$this->_maxDistances[$length] = (int) ((1 - $this->_minimumSimilarity)*(min($termLength, $length) + $prefixLength));
|
||||
return $this->_maxDistances[$length];
|
||||
}
|
||||
|
||||
/**
|
||||
* Re-write query into primitive queries in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function rewrite(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
$this->_matches = array();
|
||||
$this->_scores = array();
|
||||
$this->_termKeys = array();
|
||||
|
||||
if ($this->_term->field === null) {
|
||||
// Search through all fields
|
||||
$fields = $index->getFieldNames(true /* indexed fields list */);
|
||||
} else {
|
||||
$fields = array($this->_term->field);
|
||||
}
|
||||
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
$prefix = Zend_Search_Lucene_Index_Term::getPrefix($this->_term->text, $this->_prefixLength);
|
||||
$prefixByteLength = strlen($prefix);
|
||||
$prefixUtf8Length = Zend_Search_Lucene_Index_Term::getLength($prefix);
|
||||
|
||||
$termLength = Zend_Search_Lucene_Index_Term::getLength($this->_term->text);
|
||||
|
||||
$termRest = substr($this->_term->text, $prefixByteLength);
|
||||
// we calculate length of the rest in bytes since levenshtein() is not UTF-8 compatible
|
||||
$termRestLength = strlen($termRest);
|
||||
|
||||
$scaleFactor = 1/(1 - $this->_minimumSimilarity);
|
||||
|
||||
require_once 'Zend/Search/Lucene.php';
|
||||
$maxTerms = Zend_Search_Lucene::getTermsPerQueryLimit();
|
||||
foreach ($fields as $field) {
|
||||
$index->resetTermsStream();
|
||||
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
if ($prefix != '') {
|
||||
$index->skipTo(new Zend_Search_Lucene_Index_Term($prefix, $field));
|
||||
|
||||
while ($index->currentTerm() !== null &&
|
||||
$index->currentTerm()->field == $field &&
|
||||
substr($index->currentTerm()->text, 0, $prefixByteLength) == $prefix) {
|
||||
// Calculate similarity
|
||||
$target = substr($index->currentTerm()->text, $prefixByteLength);
|
||||
|
||||
$maxDistance = isset($this->_maxDistances[strlen($target)])?
|
||||
$this->_maxDistances[strlen($target)] :
|
||||
$this->_calculateMaxDistance($prefixUtf8Length, $termRestLength, strlen($target));
|
||||
|
||||
if ($termRestLength == 0) {
|
||||
// we don't have anything to compare. That means if we just add
|
||||
// the letters for current term we get the new word
|
||||
$similarity = (($prefixUtf8Length == 0)? 0 : 1 - strlen($target)/$prefixUtf8Length);
|
||||
} else if (strlen($target) == 0) {
|
||||
$similarity = (($prefixUtf8Length == 0)? 0 : 1 - $termRestLength/$prefixUtf8Length);
|
||||
} else if ($maxDistance < abs($termRestLength - strlen($target))){
|
||||
//just adding the characters of term to target or vice-versa results in too many edits
|
||||
//for example "pre" length is 3 and "prefixes" length is 8. We can see that
|
||||
//given this optimal circumstance, the edit distance cannot be less than 5.
|
||||
//which is 8-3 or more precisesly abs(3-8).
|
||||
//if our maximum edit distance is 4, then we can discard this word
|
||||
//without looking at it.
|
||||
$similarity = 0;
|
||||
} else {
|
||||
$similarity = 1 - levenshtein($termRest, $target)/($prefixUtf8Length + min($termRestLength, strlen($target)));
|
||||
}
|
||||
|
||||
if ($similarity > $this->_minimumSimilarity) {
|
||||
$this->_matches[] = $index->currentTerm();
|
||||
$this->_termKeys[] = $index->currentTerm()->key();
|
||||
$this->_scores[] = ($similarity - $this->_minimumSimilarity)*$scaleFactor;
|
||||
|
||||
if ($maxTerms != 0 && count($this->_matches) > $maxTerms) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
|
||||
}
|
||||
}
|
||||
|
||||
$index->nextTerm();
|
||||
}
|
||||
} else {
|
||||
$index->skipTo(new Zend_Search_Lucene_Index_Term('', $field));
|
||||
|
||||
while ($index->currentTerm() !== null && $index->currentTerm()->field == $field) {
|
||||
// Calculate similarity
|
||||
$target = $index->currentTerm()->text;
|
||||
|
||||
$maxDistance = isset($this->_maxDistances[strlen($target)])?
|
||||
$this->_maxDistances[strlen($target)] :
|
||||
$this->_calculateMaxDistance(0, $termRestLength, strlen($target));
|
||||
|
||||
if ($maxDistance < abs($termRestLength - strlen($target))){
|
||||
//just adding the characters of term to target or vice-versa results in too many edits
|
||||
//for example "pre" length is 3 and "prefixes" length is 8. We can see that
|
||||
//given this optimal circumstance, the edit distance cannot be less than 5.
|
||||
//which is 8-3 or more precisesly abs(3-8).
|
||||
//if our maximum edit distance is 4, then we can discard this word
|
||||
//without looking at it.
|
||||
$similarity = 0;
|
||||
} else {
|
||||
$similarity = 1 - levenshtein($termRest, $target)/min($termRestLength, strlen($target));
|
||||
}
|
||||
|
||||
if ($similarity > $this->_minimumSimilarity) {
|
||||
$this->_matches[] = $index->currentTerm();
|
||||
$this->_termKeys[] = $index->currentTerm()->key();
|
||||
$this->_scores[] = ($similarity - $this->_minimumSimilarity)*$scaleFactor;
|
||||
|
||||
if ($maxTerms != 0 && count($this->_matches) > $maxTerms) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
|
||||
}
|
||||
}
|
||||
|
||||
$index->nextTerm();
|
||||
}
|
||||
}
|
||||
|
||||
$index->closeTermsStream();
|
||||
}
|
||||
|
||||
if (count($this->_matches) == 0) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Empty();
|
||||
} else if (count($this->_matches) == 1) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Term.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Term(reset($this->_matches));
|
||||
} else {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
|
||||
$rewrittenQuery = new Zend_Search_Lucene_Search_Query_Boolean();
|
||||
|
||||
array_multisort($this->_scores, SORT_DESC, SORT_NUMERIC,
|
||||
$this->_termKeys, SORT_ASC, SORT_STRING,
|
||||
$this->_matches);
|
||||
|
||||
$termCount = 0;
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Term.php';
|
||||
foreach ($this->_matches as $id => $matchedTerm) {
|
||||
$subquery = new Zend_Search_Lucene_Search_Query_Term($matchedTerm);
|
||||
$subquery->setBoost($this->_scores[$id]);
|
||||
|
||||
$rewrittenQuery->addSubquery($subquery);
|
||||
|
||||
$termCount++;
|
||||
if ($termCount >= self::MAX_CLAUSE_COUNT) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return $rewrittenQuery;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimize query in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function optimize(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
|
||||
}
|
||||
|
||||
/**
|
||||
* Return query terms
|
||||
*
|
||||
* @return array
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function getQueryTerms()
|
||||
{
|
||||
if ($this->_matches === null) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Search or rewrite operations have to be performed before.');
|
||||
}
|
||||
|
||||
return $this->_matches;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an appropriate Weight implementation for this query.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return Zend_Search_Lucene_Search_Weight
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function createWeight(Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Execute query in context of index reader
|
||||
* It also initializes necessary internal structures
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get document ids likely matching the query
|
||||
*
|
||||
* It's an array with document ids as keys (performance considerations)
|
||||
*
|
||||
* @return array
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function matchedDocs()
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
|
||||
}
|
||||
|
||||
/**
|
||||
* Score specified document
|
||||
*
|
||||
* @param integer $docId
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return float
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function score($docId, Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
|
||||
}
|
||||
|
||||
/**
|
||||
* Query specific matches highlighting
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||||
*/
|
||||
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
|
||||
{
|
||||
$words = array();
|
||||
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
$prefix = Zend_Search_Lucene_Index_Term::getPrefix($this->_term->text, $this->_prefixLength);
|
||||
$prefixByteLength = strlen($prefix);
|
||||
$prefixUtf8Length = Zend_Search_Lucene_Index_Term::getLength($prefix);
|
||||
|
||||
$termLength = Zend_Search_Lucene_Index_Term::getLength($this->_term->text);
|
||||
|
||||
$termRest = substr($this->_term->text, $prefixByteLength);
|
||||
// we calculate length of the rest in bytes since levenshtein() is not UTF-8 compatible
|
||||
$termRestLength = strlen($termRest);
|
||||
|
||||
$scaleFactor = 1/(1 - $this->_minimumSimilarity);
|
||||
|
||||
$docBody = $highlighter->getDocument()->getFieldUtf8Value('body');
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($docBody, 'UTF-8');
|
||||
foreach ($tokens as $token) {
|
||||
$termText = $token->getTermText();
|
||||
|
||||
if (substr($termText, 0, $prefixByteLength) == $prefix) {
|
||||
// Calculate similarity
|
||||
$target = substr($termText, $prefixByteLength);
|
||||
|
||||
$maxDistance = isset($this->_maxDistances[strlen($target)])?
|
||||
$this->_maxDistances[strlen($target)] :
|
||||
$this->_calculateMaxDistance($prefixUtf8Length, $termRestLength, strlen($target));
|
||||
|
||||
if ($termRestLength == 0) {
|
||||
// we don't have anything to compare. That means if we just add
|
||||
// the letters for current term we get the new word
|
||||
$similarity = (($prefixUtf8Length == 0)? 0 : 1 - strlen($target)/$prefixUtf8Length);
|
||||
} else if (strlen($target) == 0) {
|
||||
$similarity = (($prefixUtf8Length == 0)? 0 : 1 - $termRestLength/$prefixUtf8Length);
|
||||
} else if ($maxDistance < abs($termRestLength - strlen($target))){
|
||||
//just adding the characters of term to target or vice-versa results in too many edits
|
||||
//for example "pre" length is 3 and "prefixes" length is 8. We can see that
|
||||
//given this optimal circumstance, the edit distance cannot be less than 5.
|
||||
//which is 8-3 or more precisesly abs(3-8).
|
||||
//if our maximum edit distance is 4, then we can discard this word
|
||||
//without looking at it.
|
||||
$similarity = 0;
|
||||
} else {
|
||||
$similarity = 1 - levenshtein($termRest, $target)/($prefixUtf8Length + min($termRestLength, strlen($target)));
|
||||
}
|
||||
|
||||
if ($similarity > $this->_minimumSimilarity) {
|
||||
$words[] = $termText;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$highlighter->highlight($words);
|
||||
}
|
||||
|
||||
/**
|
||||
* Print a query
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
// It's used only for query visualisation, so we don't care about characters escaping
|
||||
return (($this->_term->field === null)? '' : $this->_term->field . ':')
|
||||
. $this->_term->text . '~'
|
||||
. (($this->_minimumSimilarity != self::DEFAULT_MIN_SIMILARITY)? round($this->_minimumSimilarity, 4) : '')
|
||||
. (($this->getBoost() != 1)? '^' . round($this->getBoost(), 4) : '');
|
||||
}
|
||||
}
|
||||
|
139
thirdparty/Zend/Search/Lucene/Search/Query/Insignificant.php
vendored
Normal file
139
thirdparty/Zend/Search/Lucene/Search/Query/Insignificant.php
vendored
Normal file
@ -0,0 +1,139 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Insignificant.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query */
|
||||
require_once 'Zend/Search/Lucene/Search/Query.php';
|
||||
|
||||
|
||||
/**
|
||||
* The insignificant query returns empty result, but doesn't limit result set as a part of other queries
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Query_Insignificant extends Zend_Search_Lucene_Search_Query
|
||||
{
|
||||
/**
|
||||
* Re-write query into primitive queries in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function rewrite(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimize query in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function optimize(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an appropriate Weight implementation for this query.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return Zend_Search_Lucene_Search_Weight
|
||||
*/
|
||||
public function createWeight(Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Search/Weight/Empty.php';
|
||||
return new Zend_Search_Lucene_Search_Weight_Empty();
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute query in context of index reader
|
||||
* It also initializes necessary internal structures
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
*/
|
||||
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
|
||||
{
|
||||
// Do nothing
|
||||
}
|
||||
|
||||
/**
|
||||
* Get document ids likely matching the query
|
||||
*
|
||||
* It's an array with document ids as keys (performance considerations)
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function matchedDocs()
|
||||
{
|
||||
return array();
|
||||
}
|
||||
|
||||
/**
|
||||
* Score specified document
|
||||
*
|
||||
* @param integer $docId
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return float
|
||||
*/
|
||||
public function score($docId, Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return query terms
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getQueryTerms()
|
||||
{
|
||||
return array();
|
||||
}
|
||||
|
||||
/**
|
||||
* Query specific matches highlighting
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||||
*/
|
||||
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
|
||||
{
|
||||
// Do nothing
|
||||
}
|
||||
|
||||
/**
|
||||
* Print a query
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
return '<InsignificantQuery>';
|
||||
}
|
||||
}
|
||||
|
668
thirdparty/Zend/Search/Lucene/Search/Query/MultiTerm.php
vendored
Normal file
668
thirdparty/Zend/Search/Lucene/Search/Query/MultiTerm.php
vendored
Normal file
@ -0,0 +1,668 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: MultiTerm.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query */
|
||||
require_once 'Zend/Search/Lucene/Search/Query.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Search_Query
|
||||
{
|
||||
|
||||
/**
|
||||
* Terms to find.
|
||||
* Array of Zend_Search_Lucene_Index_Term
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_terms = array();
|
||||
|
||||
/**
|
||||
* Term signs.
|
||||
* If true then term is required.
|
||||
* If false then term is prohibited.
|
||||
* If null then term is neither prohibited, nor required
|
||||
*
|
||||
* If array is null then all terms are required
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_signs;
|
||||
|
||||
/**
|
||||
* Result vector.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_resVector = null;
|
||||
|
||||
/**
|
||||
* Terms positions vectors.
|
||||
* Array of Arrays:
|
||||
* term1Id => (docId => freq, ...)
|
||||
* term2Id => (docId => freq, ...)
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_termsFreqs = array();
|
||||
|
||||
|
||||
/**
|
||||
* A score factor based on the fraction of all query terms
|
||||
* that a document contains.
|
||||
* float for conjunction queries
|
||||
* array of float for non conjunction queries
|
||||
*
|
||||
* @var mixed
|
||||
*/
|
||||
private $_coord = null;
|
||||
|
||||
|
||||
/**
|
||||
* Terms weights
|
||||
* array of Zend_Search_Lucene_Search_Weight
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_weights = array();
|
||||
|
||||
|
||||
/**
|
||||
* Class constructor. Create a new multi-term query object.
|
||||
*
|
||||
* if $signs array is omitted then all terms are required
|
||||
* it differs from addTerm() behavior, but should never be used
|
||||
*
|
||||
* @param array $terms Array of Zend_Search_Lucene_Index_Term objects
|
||||
* @param array $signs Array of signs. Sign is boolean|null.
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function __construct($terms = null, $signs = null)
|
||||
{
|
||||
if (is_array($terms)) {
|
||||
require_once 'Zend/Search/Lucene.php';
|
||||
if (count($terms) > Zend_Search_Lucene::getTermsPerQueryLimit()) {
|
||||
throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
|
||||
}
|
||||
|
||||
$this->_terms = $terms;
|
||||
|
||||
$this->_signs = null;
|
||||
// Check if all terms are required
|
||||
if (is_array($signs)) {
|
||||
foreach ($signs as $sign ) {
|
||||
if ($sign !== true) {
|
||||
$this->_signs = $signs;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add a $term (Zend_Search_Lucene_Index_Term) to this query.
|
||||
*
|
||||
* The sign is specified as:
|
||||
* TRUE - term is required
|
||||
* FALSE - term is prohibited
|
||||
* NULL - term is neither prohibited, nor required
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param boolean|null $sign
|
||||
* @return void
|
||||
*/
|
||||
public function addTerm(Zend_Search_Lucene_Index_Term $term, $sign = null) {
|
||||
if ($sign !== true || $this->_signs !== null) { // Skip, if all terms are required
|
||||
if ($this->_signs === null) { // Check, If all previous terms are required
|
||||
$this->_signs = array();
|
||||
foreach ($this->_terms as $prevTerm) {
|
||||
$this->_signs[] = true;
|
||||
}
|
||||
}
|
||||
$this->_signs[] = $sign;
|
||||
}
|
||||
|
||||
$this->_terms[] = $term;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Re-write query into primitive queries in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function rewrite(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
if (count($this->_terms) == 0) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Empty();
|
||||
}
|
||||
|
||||
// Check, that all fields are qualified
|
||||
$allQualified = true;
|
||||
foreach ($this->_terms as $term) {
|
||||
if ($term->field === null) {
|
||||
$allQualified = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ($allQualified) {
|
||||
return $this;
|
||||
} else {
|
||||
/** transform multiterm query to boolean and apply rewrite() method to subqueries. */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
|
||||
$query = new Zend_Search_Lucene_Search_Query_Boolean();
|
||||
$query->setBoost($this->getBoost());
|
||||
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Term.php';
|
||||
foreach ($this->_terms as $termId => $term) {
|
||||
$subquery = new Zend_Search_Lucene_Search_Query_Term($term);
|
||||
|
||||
$query->addSubquery($subquery->rewrite($index),
|
||||
($this->_signs === null)? true : $this->_signs[$termId]);
|
||||
}
|
||||
|
||||
return $query;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimize query in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function optimize(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
$terms = $this->_terms;
|
||||
$signs = $this->_signs;
|
||||
|
||||
foreach ($terms as $id => $term) {
|
||||
if (!$index->hasTerm($term)) {
|
||||
if ($signs === null || $signs[$id] === true) {
|
||||
// Term is required
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Empty();
|
||||
} else {
|
||||
// Term is optional or prohibited
|
||||
// Remove it from terms and signs list
|
||||
unset($terms[$id]);
|
||||
unset($signs[$id]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check if all presented terms are prohibited
|
||||
$allProhibited = true;
|
||||
if ($signs === null) {
|
||||
$allProhibited = false;
|
||||
} else {
|
||||
foreach ($signs as $sign) {
|
||||
if ($sign !== false) {
|
||||
$allProhibited = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ($allProhibited) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Empty();
|
||||
}
|
||||
|
||||
/**
|
||||
* @todo make an optimization for repeated terms
|
||||
* (they may have different signs)
|
||||
*/
|
||||
|
||||
if (count($terms) == 1) {
|
||||
// It's already checked, that it's not a prohibited term
|
||||
|
||||
// It's one term query with one required or optional element
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Term.php';
|
||||
$optimizedQuery = new Zend_Search_Lucene_Search_Query_Term(reset($terms));
|
||||
$optimizedQuery->setBoost($this->getBoost());
|
||||
|
||||
return $optimizedQuery;
|
||||
}
|
||||
|
||||
if (count($terms) == 0) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Empty();
|
||||
}
|
||||
|
||||
$optimizedQuery = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $signs);
|
||||
$optimizedQuery->setBoost($this->getBoost());
|
||||
return $optimizedQuery;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns query term
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getTerms()
|
||||
{
|
||||
return $this->_terms;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return terms signs
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getSigns()
|
||||
{
|
||||
return $this->_signs;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Set weight for specified term
|
||||
*
|
||||
* @param integer $num
|
||||
* @param Zend_Search_Lucene_Search_Weight_Term $weight
|
||||
*/
|
||||
public function setWeight($num, $weight)
|
||||
{
|
||||
$this->_weights[$num] = $weight;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructs an appropriate Weight implementation for this query.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return Zend_Search_Lucene_Search_Weight
|
||||
*/
|
||||
public function createWeight(Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Search/Weight/MultiTerm.php';
|
||||
$this->_weight = new Zend_Search_Lucene_Search_Weight_MultiTerm($this, $reader);
|
||||
return $this->_weight;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculate result vector for Conjunction query
|
||||
* (like '+something +another')
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
*/
|
||||
private function _calculateConjunctionResult(Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
$this->_resVector = null;
|
||||
|
||||
if (count($this->_terms) == 0) {
|
||||
$this->_resVector = array();
|
||||
}
|
||||
|
||||
// Order terms by selectivity
|
||||
$docFreqs = array();
|
||||
$ids = array();
|
||||
foreach ($this->_terms as $id => $term) {
|
||||
$docFreqs[] = $reader->docFreq($term);
|
||||
$ids[] = $id; // Used to keep original order for terms with the same selectivity and omit terms comparison
|
||||
}
|
||||
array_multisort($docFreqs, SORT_ASC, SORT_NUMERIC,
|
||||
$ids, SORT_ASC, SORT_NUMERIC,
|
||||
$this->_terms);
|
||||
|
||||
require_once 'Zend/Search/Lucene/Index/DocsFilter.php';
|
||||
$docsFilter = new Zend_Search_Lucene_Index_DocsFilter();
|
||||
foreach ($this->_terms as $termId => $term) {
|
||||
$termDocs = $reader->termDocs($term, $docsFilter);
|
||||
}
|
||||
// Treat last retrieved docs vector as a result set
|
||||
// (filter collects data for other terms)
|
||||
$this->_resVector = array_flip($termDocs);
|
||||
|
||||
foreach ($this->_terms as $termId => $term) {
|
||||
$this->_termsFreqs[$termId] = $reader->termFreqs($term, $docsFilter);
|
||||
}
|
||||
|
||||
// ksort($this->_resVector, SORT_NUMERIC);
|
||||
// Docs are returned ordered. Used algorithms doesn't change elements order.
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculate result vector for non Conjunction query
|
||||
* (like '+something -another')
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
*/
|
||||
private function _calculateNonConjunctionResult(Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
$requiredVectors = array();
|
||||
$requiredVectorsSizes = array();
|
||||
$requiredVectorsIds = array(); // is used to prevent arrays comparison
|
||||
|
||||
$optional = array();
|
||||
$prohibited = array();
|
||||
|
||||
foreach ($this->_terms as $termId => $term) {
|
||||
$termDocs = array_flip($reader->termDocs($term));
|
||||
|
||||
if ($this->_signs[$termId] === true) {
|
||||
// required
|
||||
$requiredVectors[] = $termDocs;
|
||||
$requiredVectorsSizes[] = count($termDocs);
|
||||
$requiredVectorsIds[] = $termId;
|
||||
} elseif ($this->_signs[$termId] === false) {
|
||||
// prohibited
|
||||
// array union
|
||||
$prohibited += $termDocs;
|
||||
} else {
|
||||
// neither required, nor prohibited
|
||||
// array union
|
||||
$optional += $termDocs;
|
||||
}
|
||||
|
||||
$this->_termsFreqs[$termId] = $reader->termFreqs($term);
|
||||
}
|
||||
|
||||
// sort resvectors in order of subquery cardinality increasing
|
||||
array_multisort($requiredVectorsSizes, SORT_ASC, SORT_NUMERIC,
|
||||
$requiredVectorsIds, SORT_ASC, SORT_NUMERIC,
|
||||
$requiredVectors);
|
||||
|
||||
$required = null;
|
||||
foreach ($requiredVectors as $nextResVector) {
|
||||
if($required === null) {
|
||||
$required = $nextResVector;
|
||||
} else {
|
||||
//$required = array_intersect_key($required, $nextResVector);
|
||||
|
||||
/**
|
||||
* This code is used as workaround for array_intersect_key() slowness problem.
|
||||
*/
|
||||
$updatedVector = array();
|
||||
foreach ($required as $id => $value) {
|
||||
if (isset($nextResVector[$id])) {
|
||||
$updatedVector[$id] = $value;
|
||||
}
|
||||
}
|
||||
$required = $updatedVector;
|
||||
}
|
||||
|
||||
if (count($required) == 0) {
|
||||
// Empty result set, we don't need to check other terms
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ($required !== null) {
|
||||
$this->_resVector = $required;
|
||||
} else {
|
||||
$this->_resVector = $optional;
|
||||
}
|
||||
|
||||
if (count($prohibited) != 0) {
|
||||
// $this->_resVector = array_diff_key($this->_resVector, $prohibited);
|
||||
|
||||
/**
|
||||
* This code is used as workaround for array_diff_key() slowness problem.
|
||||
*/
|
||||
if (count($this->_resVector) < count($prohibited)) {
|
||||
$updatedVector = $this->_resVector;
|
||||
foreach ($this->_resVector as $id => $value) {
|
||||
if (isset($prohibited[$id])) {
|
||||
unset($updatedVector[$id]);
|
||||
}
|
||||
}
|
||||
$this->_resVector = $updatedVector;
|
||||
} else {
|
||||
$updatedVector = $this->_resVector;
|
||||
foreach ($prohibited as $id => $value) {
|
||||
unset($updatedVector[$id]);
|
||||
}
|
||||
$this->_resVector = $updatedVector;
|
||||
}
|
||||
}
|
||||
|
||||
ksort($this->_resVector, SORT_NUMERIC);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Score calculator for conjunction queries (all terms are required)
|
||||
*
|
||||
* @param integer $docId
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return float
|
||||
*/
|
||||
public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
if ($this->_coord === null) {
|
||||
$this->_coord = $reader->getSimilarity()->coord(count($this->_terms),
|
||||
count($this->_terms) );
|
||||
}
|
||||
|
||||
$score = 0.0;
|
||||
|
||||
foreach ($this->_terms as $termId => $term) {
|
||||
/**
|
||||
* We don't need to check that term freq is not 0
|
||||
* Score calculation is performed only for matched docs
|
||||
*/
|
||||
$score += $reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) *
|
||||
$this->_weights[$termId]->getValue() *
|
||||
$reader->norm($docId, $term->field);
|
||||
}
|
||||
|
||||
return $score * $this->_coord * $this->getBoost();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Score calculator for non conjunction queries (not all terms are required)
|
||||
*
|
||||
* @param integer $docId
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return float
|
||||
*/
|
||||
public function _nonConjunctionScore($docId, $reader)
|
||||
{
|
||||
if ($this->_coord === null) {
|
||||
$this->_coord = array();
|
||||
|
||||
$maxCoord = 0;
|
||||
foreach ($this->_signs as $sign) {
|
||||
if ($sign !== false /* not prohibited */) {
|
||||
$maxCoord++;
|
||||
}
|
||||
}
|
||||
|
||||
for ($count = 0; $count <= $maxCoord; $count++) {
|
||||
$this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord);
|
||||
}
|
||||
}
|
||||
|
||||
$score = 0.0;
|
||||
$matchedTerms = 0;
|
||||
foreach ($this->_terms as $termId=>$term) {
|
||||
// Check if term is
|
||||
if ($this->_signs[$termId] !== false && // not prohibited
|
||||
isset($this->_termsFreqs[$termId][$docId]) // matched
|
||||
) {
|
||||
$matchedTerms++;
|
||||
|
||||
/**
|
||||
* We don't need to check that term freq is not 0
|
||||
* Score calculation is performed only for matched docs
|
||||
*/
|
||||
$score +=
|
||||
$reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) *
|
||||
$this->_weights[$termId]->getValue() *
|
||||
$reader->norm($docId, $term->field);
|
||||
}
|
||||
}
|
||||
|
||||
return $score * $this->_coord[$matchedTerms] * $this->getBoost();
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute query in context of index reader
|
||||
* It also initializes necessary internal structures
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
*/
|
||||
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
|
||||
{
|
||||
if ($this->_signs === null) {
|
||||
$this->_calculateConjunctionResult($reader);
|
||||
} else {
|
||||
$this->_calculateNonConjunctionResult($reader);
|
||||
}
|
||||
|
||||
// Initialize weight if it's not done yet
|
||||
$this->_initWeight($reader);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get document ids likely matching the query
|
||||
*
|
||||
* It's an array with document ids as keys (performance considerations)
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function matchedDocs()
|
||||
{
|
||||
return $this->_resVector;
|
||||
}
|
||||
|
||||
/**
|
||||
* Score specified document
|
||||
*
|
||||
* @param integer $docId
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return float
|
||||
*/
|
||||
public function score($docId, Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
if (isset($this->_resVector[$docId])) {
|
||||
if ($this->_signs === null) {
|
||||
return $this->_conjunctionScore($docId, $reader);
|
||||
} else {
|
||||
return $this->_nonConjunctionScore($docId, $reader);
|
||||
}
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return query terms
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getQueryTerms()
|
||||
{
|
||||
if ($this->_signs === null) {
|
||||
return $this->_terms;
|
||||
}
|
||||
|
||||
$terms = array();
|
||||
|
||||
foreach ($this->_signs as $id => $sign) {
|
||||
if ($sign !== false) {
|
||||
$terms[] = $this->_terms[$id];
|
||||
}
|
||||
}
|
||||
|
||||
return $terms;
|
||||
}
|
||||
|
||||
/**
|
||||
* Query specific matches highlighting
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||||
*/
|
||||
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
|
||||
{
|
||||
$words = array();
|
||||
|
||||
if ($this->_signs === null) {
|
||||
foreach ($this->_terms as $term) {
|
||||
$words[] = $term->text;
|
||||
}
|
||||
} else {
|
||||
foreach ($this->_signs as $id => $sign) {
|
||||
if ($sign !== false) {
|
||||
$words[] = $this->_terms[$id]->text;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$highlighter->highlight($words);
|
||||
}
|
||||
|
||||
/**
|
||||
* Print a query
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
// It's used only for query visualisation, so we don't care about characters escaping
|
||||
|
||||
$query = '';
|
||||
|
||||
foreach ($this->_terms as $id => $term) {
|
||||
if ($id != 0) {
|
||||
$query .= ' ';
|
||||
}
|
||||
|
||||
if ($this->_signs === null || $this->_signs[$id] === true) {
|
||||
$query .= '+';
|
||||
} else if ($this->_signs[$id] === false) {
|
||||
$query .= '-';
|
||||
}
|
||||
|
||||
if ($term->field !== null) {
|
||||
$query .= $term->field . ':';
|
||||
}
|
||||
$query .= $term->text;
|
||||
}
|
||||
|
||||
if ($this->getBoost() != 1) {
|
||||
$query = '(' . $query . ')^' . round($this->getBoost(), 4);
|
||||
}
|
||||
|
||||
return $query;
|
||||
}
|
||||
}
|
||||
|
576
thirdparty/Zend/Search/Lucene/Search/Query/Phrase.php
vendored
Normal file
576
thirdparty/Zend/Search/Lucene/Search/Query/Phrase.php
vendored
Normal file
@ -0,0 +1,576 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Phrase.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query */
|
||||
require_once 'Zend/Search/Lucene/Search/Query.php';
|
||||
|
||||
|
||||
/**
|
||||
* A Query that matches documents containing a particular sequence of terms.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Query_Phrase extends Zend_Search_Lucene_Search_Query
|
||||
{
|
||||
/**
|
||||
* Terms to find.
|
||||
* Array of Zend_Search_Lucene_Index_Term objects.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_terms;
|
||||
|
||||
/**
|
||||
* Term positions (relative positions of terms within the phrase).
|
||||
* Array of integers
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_offsets;
|
||||
|
||||
/**
|
||||
* Sets the number of other words permitted between words in query phrase.
|
||||
* If zero, then this is an exact phrase search. For larger values this works
|
||||
* like a WITHIN or NEAR operator.
|
||||
*
|
||||
* The slop is in fact an edit-distance, where the units correspond to
|
||||
* moves of terms in the query phrase out of position. For example, to switch
|
||||
* the order of two words requires two moves (the first move places the words
|
||||
* atop one another), so to permit re-orderings of phrases, the slop must be
|
||||
* at least two.
|
||||
* More exact matches are scored higher than sloppier matches, thus search
|
||||
* results are sorted by exactness.
|
||||
*
|
||||
* The slop is zero by default, requiring exact matches.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_slop;
|
||||
|
||||
/**
|
||||
* Result vector.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_resVector = null;
|
||||
|
||||
/**
|
||||
* Terms positions vectors.
|
||||
* Array of Arrays:
|
||||
* term1Id => (docId => array( pos1, pos2, ... ), ...)
|
||||
* term2Id => (docId => array( pos1, pos2, ... ), ...)
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_termsPositions = array();
|
||||
|
||||
/**
|
||||
* Class constructor. Create a new prase query.
|
||||
*
|
||||
* @param string $field Field to search.
|
||||
* @param array $terms Terms to search Array of strings.
|
||||
* @param array $offsets Relative term positions. Array of integers.
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function __construct($terms = null, $offsets = null, $field = null)
|
||||
{
|
||||
$this->_slop = 0;
|
||||
|
||||
if (is_array($terms)) {
|
||||
$this->_terms = array();
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
foreach ($terms as $termId => $termText) {
|
||||
$this->_terms[$termId] = ($field !== null)? new Zend_Search_Lucene_Index_Term($termText, $field):
|
||||
new Zend_Search_Lucene_Index_Term($termText);
|
||||
}
|
||||
} else if ($terms === null) {
|
||||
$this->_terms = array();
|
||||
} else {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('terms argument must be array of strings or null');
|
||||
}
|
||||
|
||||
if (is_array($offsets)) {
|
||||
if (count($this->_terms) != count($offsets)) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('terms and offsets arguments must have the same size.');
|
||||
}
|
||||
$this->_offsets = $offsets;
|
||||
} else if ($offsets === null) {
|
||||
$this->_offsets = array();
|
||||
foreach ($this->_terms as $termId => $term) {
|
||||
$position = count($this->_offsets);
|
||||
$this->_offsets[$termId] = $position;
|
||||
}
|
||||
} else {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('offsets argument must be array of strings or null');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set slop
|
||||
*
|
||||
* @param integer $slop
|
||||
*/
|
||||
public function setSlop($slop)
|
||||
{
|
||||
$this->_slop = $slop;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get slop
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function getSlop()
|
||||
{
|
||||
return $this->_slop;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a term to the end of the query phrase.
|
||||
* The relative position of the term is specified explicitly or the one immediately
|
||||
* after the last term added.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param integer $position
|
||||
*/
|
||||
public function addTerm(Zend_Search_Lucene_Index_Term $term, $position = null) {
|
||||
if ((count($this->_terms) != 0)&&(end($this->_terms)->field != $term->field)) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('All phrase terms must be in the same field: ' .
|
||||
$term->field . ':' . $term->text);
|
||||
}
|
||||
|
||||
$this->_terms[] = $term;
|
||||
if ($position !== null) {
|
||||
$this->_offsets[] = $position;
|
||||
} else if (count($this->_offsets) != 0) {
|
||||
$this->_offsets[] = end($this->_offsets) + 1;
|
||||
} else {
|
||||
$this->_offsets[] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Re-write query into primitive queries in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function rewrite(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
if (count($this->_terms) == 0) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Empty();
|
||||
} else if ($this->_terms[0]->field !== null) {
|
||||
return $this;
|
||||
} else {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
|
||||
$query = new Zend_Search_Lucene_Search_Query_Boolean();
|
||||
$query->setBoost($this->getBoost());
|
||||
|
||||
foreach ($index->getFieldNames(true) as $fieldName) {
|
||||
$subquery = new Zend_Search_Lucene_Search_Query_Phrase();
|
||||
$subquery->setSlop($this->getSlop());
|
||||
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
foreach ($this->_terms as $termId => $term) {
|
||||
$qualifiedTerm = new Zend_Search_Lucene_Index_Term($term->text, $fieldName);
|
||||
|
||||
$subquery->addTerm($qualifiedTerm, $this->_offsets[$termId]);
|
||||
}
|
||||
|
||||
$query->addSubquery($subquery);
|
||||
}
|
||||
|
||||
return $query;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimize query in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function optimize(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
// Check, that index contains all phrase terms
|
||||
foreach ($this->_terms as $term) {
|
||||
if (!$index->hasTerm($term)) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Empty();
|
||||
}
|
||||
}
|
||||
|
||||
if (count($this->_terms) == 1) {
|
||||
// It's one term query
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Term.php';
|
||||
$optimizedQuery = new Zend_Search_Lucene_Search_Query_Term(reset($this->_terms));
|
||||
$optimizedQuery->setBoost($this->getBoost());
|
||||
|
||||
return $optimizedQuery;
|
||||
}
|
||||
|
||||
if (count($this->_terms) == 0) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Empty();
|
||||
}
|
||||
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns query term
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getTerms()
|
||||
{
|
||||
return $this->_terms;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Set weight for specified term
|
||||
*
|
||||
* @param integer $num
|
||||
* @param Zend_Search_Lucene_Search_Weight_Term $weight
|
||||
*/
|
||||
public function setWeight($num, $weight)
|
||||
{
|
||||
$this->_weights[$num] = $weight;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructs an appropriate Weight implementation for this query.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return Zend_Search_Lucene_Search_Weight
|
||||
*/
|
||||
public function createWeight(Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Search/Weight/Phrase.php';
|
||||
$this->_weight = new Zend_Search_Lucene_Search_Weight_Phrase($this, $reader);
|
||||
return $this->_weight;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Score calculator for exact phrase queries (terms sequence is fixed)
|
||||
*
|
||||
* @param integer $docId
|
||||
* @return float
|
||||
*/
|
||||
public function _exactPhraseFreq($docId)
|
||||
{
|
||||
$freq = 0;
|
||||
|
||||
// Term Id with lowest cardinality
|
||||
$lowCardTermId = null;
|
||||
|
||||
// Calculate $lowCardTermId
|
||||
foreach ($this->_terms as $termId => $term) {
|
||||
if ($lowCardTermId === null ||
|
||||
count($this->_termsPositions[$termId][$docId]) <
|
||||
count($this->_termsPositions[$lowCardTermId][$docId]) ) {
|
||||
$lowCardTermId = $termId;
|
||||
}
|
||||
}
|
||||
|
||||
// Walk through positions of the term with lowest cardinality
|
||||
foreach ($this->_termsPositions[$lowCardTermId][$docId] as $lowCardPos) {
|
||||
// We expect phrase to be found
|
||||
$freq++;
|
||||
|
||||
// Walk through other terms
|
||||
foreach ($this->_terms as $termId => $term) {
|
||||
if ($termId != $lowCardTermId) {
|
||||
$expectedPosition = $lowCardPos +
|
||||
($this->_offsets[$termId] -
|
||||
$this->_offsets[$lowCardTermId]);
|
||||
|
||||
if (!in_array($expectedPosition, $this->_termsPositions[$termId][$docId])) {
|
||||
$freq--; // Phrase wasn't found.
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $freq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Score calculator for sloppy phrase queries (terms sequence is fixed)
|
||||
*
|
||||
* @param integer $docId
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return float
|
||||
*/
|
||||
public function _sloppyPhraseFreq($docId, Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
$freq = 0;
|
||||
|
||||
$phraseQueue = array();
|
||||
$phraseQueue[0] = array(); // empty phrase
|
||||
$lastTerm = null;
|
||||
|
||||
// Walk through the terms to create phrases.
|
||||
foreach ($this->_terms as $termId => $term) {
|
||||
$queueSize = count($phraseQueue);
|
||||
$firstPass = true;
|
||||
|
||||
// Walk through the term positions.
|
||||
// Each term position produces a set of phrases.
|
||||
foreach ($this->_termsPositions[$termId][$docId] as $termPosition ) {
|
||||
if ($firstPass) {
|
||||
for ($count = 0; $count < $queueSize; $count++) {
|
||||
$phraseQueue[$count][$termId] = $termPosition;
|
||||
}
|
||||
} else {
|
||||
for ($count = 0; $count < $queueSize; $count++) {
|
||||
if ($lastTerm !== null &&
|
||||
abs( $termPosition - $phraseQueue[$count][$lastTerm] -
|
||||
($this->_offsets[$termId] - $this->_offsets[$lastTerm])) > $this->_slop) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$newPhraseId = count($phraseQueue);
|
||||
$phraseQueue[$newPhraseId] = $phraseQueue[$count];
|
||||
$phraseQueue[$newPhraseId][$termId] = $termPosition;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
$firstPass = false;
|
||||
}
|
||||
$lastTerm = $termId;
|
||||
}
|
||||
|
||||
|
||||
foreach ($phraseQueue as $phrasePos) {
|
||||
$minDistance = null;
|
||||
|
||||
for ($shift = -$this->_slop; $shift <= $this->_slop; $shift++) {
|
||||
$distance = 0;
|
||||
$start = reset($phrasePos) - reset($this->_offsets) + $shift;
|
||||
|
||||
foreach ($this->_terms as $termId => $term) {
|
||||
$distance += abs($phrasePos[$termId] - $this->_offsets[$termId] - $start);
|
||||
|
||||
if($distance > $this->_slop) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ($minDistance === null || $distance < $minDistance) {
|
||||
$minDistance = $distance;
|
||||
}
|
||||
}
|
||||
|
||||
if ($minDistance <= $this->_slop) {
|
||||
$freq += $reader->getSimilarity()->sloppyFreq($minDistance);
|
||||
}
|
||||
}
|
||||
|
||||
return $freq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute query in context of index reader
|
||||
* It also initializes necessary internal structures
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
*/
|
||||
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
|
||||
{
|
||||
$this->_resVector = null;
|
||||
|
||||
if (count($this->_terms) == 0) {
|
||||
$this->_resVector = array();
|
||||
}
|
||||
|
||||
$resVectors = array();
|
||||
$resVectorsSizes = array();
|
||||
$resVectorsIds = array(); // is used to prevent arrays comparison
|
||||
foreach ($this->_terms as $termId => $term) {
|
||||
$resVectors[] = array_flip($reader->termDocs($term));
|
||||
$resVectorsSizes[] = count(end($resVectors));
|
||||
$resVectorsIds[] = $termId;
|
||||
|
||||
$this->_termsPositions[$termId] = $reader->termPositions($term);
|
||||
}
|
||||
// sort resvectors in order of subquery cardinality increasing
|
||||
array_multisort($resVectorsSizes, SORT_ASC, SORT_NUMERIC,
|
||||
$resVectorsIds, SORT_ASC, SORT_NUMERIC,
|
||||
$resVectors);
|
||||
|
||||
foreach ($resVectors as $nextResVector) {
|
||||
if($this->_resVector === null) {
|
||||
$this->_resVector = $nextResVector;
|
||||
} else {
|
||||
//$this->_resVector = array_intersect_key($this->_resVector, $nextResVector);
|
||||
|
||||
/**
|
||||
* This code is used as workaround for array_intersect_key() slowness problem.
|
||||
*/
|
||||
$updatedVector = array();
|
||||
foreach ($this->_resVector as $id => $value) {
|
||||
if (isset($nextResVector[$id])) {
|
||||
$updatedVector[$id] = $value;
|
||||
}
|
||||
}
|
||||
$this->_resVector = $updatedVector;
|
||||
}
|
||||
|
||||
if (count($this->_resVector) == 0) {
|
||||
// Empty result set, we don't need to check other terms
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// ksort($this->_resVector, SORT_NUMERIC);
|
||||
// Docs are returned ordered. Used algorithm doesn't change elements order.
|
||||
|
||||
// Initialize weight if it's not done yet
|
||||
$this->_initWeight($reader);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get document ids likely matching the query
|
||||
*
|
||||
* It's an array with document ids as keys (performance considerations)
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function matchedDocs()
|
||||
{
|
||||
return $this->_resVector;
|
||||
}
|
||||
|
||||
/**
|
||||
* Score specified document
|
||||
*
|
||||
* @param integer $docId
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return float
|
||||
*/
|
||||
public function score($docId, Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
if (isset($this->_resVector[$docId])) {
|
||||
if ($this->_slop == 0) {
|
||||
$freq = $this->_exactPhraseFreq($docId);
|
||||
} else {
|
||||
$freq = $this->_sloppyPhraseFreq($docId, $reader);
|
||||
}
|
||||
|
||||
if ($freq != 0) {
|
||||
$tf = $reader->getSimilarity()->tf($freq);
|
||||
$weight = $this->_weight->getValue();
|
||||
$norm = $reader->norm($docId, reset($this->_terms)->field);
|
||||
|
||||
return $tf * $weight * $norm * $this->getBoost();
|
||||
}
|
||||
|
||||
// Included in result, but culculated freq is zero
|
||||
return 0;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return query terms
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getQueryTerms()
|
||||
{
|
||||
return $this->_terms;
|
||||
}
|
||||
|
||||
/**
|
||||
* Query specific matches highlighting
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||||
*/
|
||||
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
|
||||
{
|
||||
$words = array();
|
||||
foreach ($this->_terms as $term) {
|
||||
$words[] = $term->text;
|
||||
}
|
||||
|
||||
$highlighter->highlight($words);
|
||||
}
|
||||
|
||||
/**
|
||||
* Print a query
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
// It's used only for query visualisation, so we don't care about characters escaping
|
||||
if (isset($this->_terms[0]) && $this->_terms[0]->field !== null) {
|
||||
$query = $this->_terms[0]->field . ':';
|
||||
} else {
|
||||
$query = '';
|
||||
}
|
||||
|
||||
$query .= '"';
|
||||
|
||||
foreach ($this->_terms as $id => $term) {
|
||||
if ($id != 0) {
|
||||
$query .= ' ';
|
||||
}
|
||||
$query .= $term->text;
|
||||
}
|
||||
|
||||
$query .= '"';
|
||||
|
||||
if ($this->_slop != 0) {
|
||||
$query .= '~' . $this->_slop;
|
||||
}
|
||||
|
||||
if ($this->getBoost() != 1) {
|
||||
$query .= '^' . round($this->getBoost(), 4);
|
||||
}
|
||||
|
||||
return $query;
|
||||
}
|
||||
}
|
||||
|
127
thirdparty/Zend/Search/Lucene/Search/Query/Preprocessing.php
vendored
Normal file
127
thirdparty/Zend/Search/Lucene/Search/Query/Preprocessing.php
vendored
Normal file
@ -0,0 +1,127 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Preprocessing.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query */
|
||||
require_once 'Zend/Search/Lucene/Search/Query.php';
|
||||
|
||||
|
||||
/**
|
||||
* It's an internal abstract class intended to finalize ase a query processing after query parsing.
|
||||
* This type of query is not actually involved into query execution.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @internal
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
abstract class Zend_Search_Lucene_Search_Query_Preprocessing extends Zend_Search_Lucene_Search_Query
|
||||
{
|
||||
/**
|
||||
* Matched terms.
|
||||
*
|
||||
* Matched terms list.
|
||||
* It's filled during rewrite operation and may be used for search result highlighting
|
||||
*
|
||||
* Array of Zend_Search_Lucene_Index_Term objects
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $_matches = null;
|
||||
|
||||
/**
|
||||
* Optimize query in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function optimize(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('This query is not intended to be executed.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an appropriate Weight implementation for this query.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return Zend_Search_Lucene_Search_Weight
|
||||
*/
|
||||
public function createWeight(Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('This query is not intended to be executed.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute query in context of index reader
|
||||
* It also initializes necessary internal structures
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
*/
|
||||
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('This query is not intended to be executed.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get document ids likely matching the query
|
||||
*
|
||||
* It's an array with document ids as keys (performance considerations)
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function matchedDocs()
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('This query is not intended to be executed.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Score specified document
|
||||
*
|
||||
* @param integer $docId
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return float
|
||||
*/
|
||||
public function score($docId, Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('This query is not intended to be executed.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Return query terms
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getQueryTerms()
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Rewrite operation has to be done before retrieving query terms.');
|
||||
}
|
||||
}
|
||||
|
287
thirdparty/Zend/Search/Lucene/Search/Query/Preprocessing/Fuzzy.php
vendored
Normal file
287
thirdparty/Zend/Search/Lucene/Search/Query/Preprocessing/Fuzzy.php
vendored
Normal file
@ -0,0 +1,287 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Fuzzy.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Processing */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Preprocessing.php';
|
||||
|
||||
|
||||
/**
|
||||
* It's an internal abstract class intended to finalize ase a query processing after query parsing.
|
||||
* This type of query is not actually involved into query execution.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @internal
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Query_Preprocessing_Fuzzy extends Zend_Search_Lucene_Search_Query_Preprocessing
|
||||
{
|
||||
/**
|
||||
* word (query parser lexeme) to find.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_word;
|
||||
|
||||
/**
|
||||
* Word encoding (field name is always provided using UTF-8 encoding since it may be retrieved from index).
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_encoding;
|
||||
|
||||
|
||||
/**
|
||||
* Field name.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_field;
|
||||
|
||||
/**
|
||||
* A value between 0 and 1 to set the required similarity
|
||||
* between the query term and the matching terms. For example, for a
|
||||
* _minimumSimilarity of 0.5 a term of the same length
|
||||
* as the query term is considered similar to the query term if the edit distance
|
||||
* between both terms is less than length(term)*0.5
|
||||
*
|
||||
* @var float
|
||||
*/
|
||||
private $_minimumSimilarity;
|
||||
|
||||
/**
|
||||
* Class constructor. Create a new preprocessing object for prase query.
|
||||
*
|
||||
* @param string $word Non-tokenized word (query parser lexeme) to search.
|
||||
* @param string $encoding Word encoding.
|
||||
* @param string $fieldName Field name.
|
||||
* @param float $minimumSimilarity minimum similarity
|
||||
*/
|
||||
public function __construct($word, $encoding, $fieldName, $minimumSimilarity)
|
||||
{
|
||||
$this->_word = $word;
|
||||
$this->_encoding = $encoding;
|
||||
$this->_field = $fieldName;
|
||||
$this->_minimumSimilarity = $minimumSimilarity;
|
||||
}
|
||||
|
||||
/**
|
||||
* Re-write query into primitive queries in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function rewrite(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
if ($this->_field === null) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
|
||||
$query = new Zend_Search_Lucene_Search_Query_Boolean();
|
||||
|
||||
$hasInsignificantSubqueries = false;
|
||||
|
||||
require_once 'Zend/Search/Lucene.php';
|
||||
if (Zend_Search_Lucene::getDefaultSearchField() === null) {
|
||||
$searchFields = $index->getFieldNames(true);
|
||||
} else {
|
||||
$searchFields = array(Zend_Search_Lucene::getDefaultSearchField());
|
||||
}
|
||||
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Preprocessing/Fuzzy.php';
|
||||
foreach ($searchFields as $fieldName) {
|
||||
$subquery = new Zend_Search_Lucene_Search_Query_Preprocessing_Fuzzy($this->_word,
|
||||
$this->_encoding,
|
||||
$fieldName,
|
||||
$this->_minimumSimilarity);
|
||||
|
||||
$rewrittenSubquery = $subquery->rewrite($index);
|
||||
|
||||
if ( !($rewrittenSubquery instanceof Zend_Search_Lucene_Search_Query_Insignificant ||
|
||||
$rewrittenSubquery instanceof Zend_Search_Lucene_Search_Query_Empty) ) {
|
||||
$query->addSubquery($rewrittenSubquery);
|
||||
}
|
||||
|
||||
if ($rewrittenSubquery instanceof Zend_Search_Lucene_Search_Query_Insignificant) {
|
||||
$hasInsignificantSubqueries = true;
|
||||
}
|
||||
}
|
||||
|
||||
$subqueries = $query->getSubqueries();
|
||||
|
||||
if (count($subqueries) == 0) {
|
||||
$this->_matches = array();
|
||||
if ($hasInsignificantSubqueries) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Insignificant();
|
||||
} else {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Empty();
|
||||
}
|
||||
}
|
||||
|
||||
if (count($subqueries) == 1) {
|
||||
$query = reset($subqueries);
|
||||
}
|
||||
|
||||
$query->setBoost($this->getBoost());
|
||||
|
||||
$this->_matches = $query->getQueryTerms();
|
||||
return $query;
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
// Recognize exact term matching (it corresponds to Keyword fields stored in the index)
|
||||
// encoding is not used since we expect binary matching
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
$term = new Zend_Search_Lucene_Index_Term($this->_word, $this->_field);
|
||||
if ($index->hasTerm($term)) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Fuzzy.php';
|
||||
$query = new Zend_Search_Lucene_Search_Query_Fuzzy($term, $this->_minimumSimilarity);
|
||||
$query->setBoost($this->getBoost());
|
||||
|
||||
// Get rewritten query. Important! It also fills terms matching container.
|
||||
$rewrittenQuery = $query->rewrite($index);
|
||||
$this->_matches = $query->getQueryTerms();
|
||||
|
||||
return $rewrittenQuery;
|
||||
}
|
||||
|
||||
|
||||
// -------------------------------------
|
||||
// Recognize wildcard queries
|
||||
|
||||
/** @todo check for PCRE unicode support may be performed through Zend_Environment in some future */
|
||||
if (@preg_match('/\pL/u', 'a') == 1) {
|
||||
$subPatterns = preg_split('/[*?]/u', iconv($this->_encoding, 'UTF-8', $this->_word));
|
||||
} else {
|
||||
$subPatterns = preg_split('/[*?]/', $this->_word);
|
||||
}
|
||||
if (count($subPatterns) > 1) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Fuzzy search doesn\'t support wildcards (except within Keyword fields).');
|
||||
}
|
||||
|
||||
|
||||
// -------------------------------------
|
||||
// Recognize one-term multi-term and "insignificant" queries
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_word, $this->_encoding);
|
||||
|
||||
if (count($tokens) == 0) {
|
||||
$this->_matches = array();
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Insignificant();
|
||||
}
|
||||
|
||||
if (count($tokens) == 1) {
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
$term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Fuzzy.php';
|
||||
$query = new Zend_Search_Lucene_Search_Query_Fuzzy($term, $this->_minimumSimilarity);
|
||||
$query->setBoost($this->getBoost());
|
||||
|
||||
// Get rewritten query. Important! It also fills terms matching container.
|
||||
$rewrittenQuery = $query->rewrite($index);
|
||||
$this->_matches = $query->getQueryTerms();
|
||||
|
||||
return $rewrittenQuery;
|
||||
}
|
||||
|
||||
// Word is tokenized into several tokens
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Fuzzy search is supported only for non-multiple word terms');
|
||||
}
|
||||
|
||||
/**
|
||||
* Query specific matches highlighting
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||||
*/
|
||||
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
|
||||
{
|
||||
/** Skip fields detection. We don't need it, since we expect all fields presented in the HTML body and don't differentiate them */
|
||||
|
||||
/** Skip exact term matching recognition, keyword fields highlighting is not supported */
|
||||
|
||||
// -------------------------------------
|
||||
// Recognize wildcard queries
|
||||
|
||||
/** @todo check for PCRE unicode support may be performed through Zend_Environment in some future */
|
||||
if (@preg_match('/\pL/u', 'a') == 1) {
|
||||
$subPatterns = preg_split('/[*?]/u', iconv($this->_encoding, 'UTF-8', $this->_word));
|
||||
} else {
|
||||
$subPatterns = preg_split('/[*?]/', $this->_word);
|
||||
}
|
||||
if (count($subPatterns) > 1) {
|
||||
// Do nothing
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// -------------------------------------
|
||||
// Recognize one-term multi-term and "insignificant" queries
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_word, $this->_encoding);
|
||||
if (count($tokens) == 0) {
|
||||
// Do nothing
|
||||
return;
|
||||
}
|
||||
if (count($tokens) == 1) {
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
$term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Fuzzy.php';
|
||||
$query = new Zend_Search_Lucene_Search_Query_Fuzzy($term, $this->_minimumSimilarity);
|
||||
|
||||
$query->_highlightMatches($highlighter);
|
||||
return;
|
||||
}
|
||||
|
||||
// Word is tokenized into several tokens
|
||||
// But fuzzy search is supported only for non-multiple word terms
|
||||
// Do nothing
|
||||
}
|
||||
|
||||
/**
|
||||
* Print a query
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
// It's used only for query visualisation, so we don't care about characters escaping
|
||||
if ($this->_field !== null) {
|
||||
$query = $this->_field . ':';
|
||||
} else {
|
||||
$query = '';
|
||||
}
|
||||
|
||||
$query .= $this->_word;
|
||||
|
||||
if ($this->getBoost() != 1) {
|
||||
$query .= '^' . round($this->getBoost(), 4);
|
||||
}
|
||||
|
||||
return $query;
|
||||
}
|
||||
}
|
270
thirdparty/Zend/Search/Lucene/Search/Query/Preprocessing/Phrase.php
vendored
Normal file
270
thirdparty/Zend/Search/Lucene/Search/Query/Preprocessing/Phrase.php
vendored
Normal file
@ -0,0 +1,270 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Phrase.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Processing */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Preprocessing.php';
|
||||
|
||||
/**
|
||||
* It's an internal abstract class intended to finalize ase a query processing after query parsing.
|
||||
* This type of query is not actually involved into query execution.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @internal
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Query_Preprocessing_Phrase extends Zend_Search_Lucene_Search_Query_Preprocessing
|
||||
{
|
||||
/**
|
||||
* Phrase to find.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_phrase;
|
||||
|
||||
/**
|
||||
* Phrase encoding (field name is always provided using UTF-8 encoding since it may be retrieved from index).
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_phraseEncoding;
|
||||
|
||||
|
||||
/**
|
||||
* Field name.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_field;
|
||||
|
||||
/**
|
||||
* Sets the number of other words permitted between words in query phrase.
|
||||
* If zero, then this is an exact phrase search. For larger values this works
|
||||
* like a WITHIN or NEAR operator.
|
||||
*
|
||||
* The slop is in fact an edit-distance, where the units correspond to
|
||||
* moves of terms in the query phrase out of position. For example, to switch
|
||||
* the order of two words requires two moves (the first move places the words
|
||||
* atop one another), so to permit re-orderings of phrases, the slop must be
|
||||
* at least two.
|
||||
* More exact matches are scored higher than sloppier matches, thus search
|
||||
* results are sorted by exactness.
|
||||
*
|
||||
* The slop is zero by default, requiring exact matches.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_slop;
|
||||
|
||||
/**
|
||||
* Class constructor. Create a new preprocessing object for prase query.
|
||||
*
|
||||
* @param string $phrase Phrase to search.
|
||||
* @param string $phraseEncoding Phrase encoding.
|
||||
* @param string $fieldName Field name.
|
||||
*/
|
||||
public function __construct($phrase, $phraseEncoding, $fieldName)
|
||||
{
|
||||
$this->_phrase = $phrase;
|
||||
$this->_phraseEncoding = $phraseEncoding;
|
||||
$this->_field = $fieldName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set slop
|
||||
*
|
||||
* @param integer $slop
|
||||
*/
|
||||
public function setSlop($slop)
|
||||
{
|
||||
$this->_slop = $slop;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get slop
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function getSlop()
|
||||
{
|
||||
return $this->_slop;
|
||||
}
|
||||
|
||||
/**
|
||||
* Re-write query into primitive queries in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function rewrite(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
// Allow to use wildcards within phrases
|
||||
// They are either removed by text analyzer or used as a part of keyword for keyword fields
|
||||
//
|
||||
// if (strpos($this->_phrase, '?') !== false || strpos($this->_phrase, '*') !== false) {
|
||||
// require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
// throw new Zend_Search_Lucene_Search_QueryParserException('Wildcards are only allowed in a single terms.');
|
||||
// }
|
||||
|
||||
// Split query into subqueries if field name is not specified
|
||||
if ($this->_field === null) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
|
||||
$query = new Zend_Search_Lucene_Search_Query_Boolean();
|
||||
$query->setBoost($this->getBoost());
|
||||
|
||||
require_once 'Zend/Search/Lucene.php';
|
||||
if (Zend_Search_Lucene::getDefaultSearchField() === null) {
|
||||
$searchFields = $index->getFieldNames(true);
|
||||
} else {
|
||||
$searchFields = array(Zend_Search_Lucene::getDefaultSearchField());
|
||||
}
|
||||
|
||||
foreach ($searchFields as $fieldName) {
|
||||
$subquery = new Zend_Search_Lucene_Search_Query_Preprocessing_Phrase($this->_phrase,
|
||||
$this->_phraseEncoding,
|
||||
$fieldName);
|
||||
$subquery->setSlop($this->getSlop());
|
||||
|
||||
$query->addSubquery($subquery->rewrite($index));
|
||||
}
|
||||
|
||||
$this->_matches = $query->getQueryTerms();
|
||||
return $query;
|
||||
}
|
||||
|
||||
// Recognize exact term matching (it corresponds to Keyword fields stored in the index)
|
||||
// encoding is not used since we expect binary matching
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
$term = new Zend_Search_Lucene_Index_Term($this->_phrase, $this->_field);
|
||||
if ($index->hasTerm($term)) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Term.php';
|
||||
$query = new Zend_Search_Lucene_Search_Query_Term($term);
|
||||
$query->setBoost($this->getBoost());
|
||||
|
||||
$this->_matches = $query->getQueryTerms();
|
||||
return $query;
|
||||
}
|
||||
|
||||
|
||||
// tokenize phrase using current analyzer and process it as a phrase query
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_phrase, $this->_phraseEncoding);
|
||||
|
||||
if (count($tokens) == 0) {
|
||||
$this->_matches = array();
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Insignificant();
|
||||
}
|
||||
|
||||
if (count($tokens) == 1) {
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
$term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Term.php';
|
||||
$query = new Zend_Search_Lucene_Search_Query_Term($term);
|
||||
$query->setBoost($this->getBoost());
|
||||
|
||||
$this->_matches = $query->getQueryTerms();
|
||||
return $query;
|
||||
}
|
||||
|
||||
//It's non-trivial phrase query
|
||||
$position = -1;
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Phrase.php';
|
||||
$query = new Zend_Search_Lucene_Search_Query_Phrase();
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
foreach ($tokens as $token) {
|
||||
$position += $token->getPositionIncrement();
|
||||
$term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $this->_field);
|
||||
$query->addTerm($term, $position);
|
||||
$query->setSlop($this->getSlop());
|
||||
}
|
||||
$this->_matches = $query->getQueryTerms();
|
||||
return $query;
|
||||
}
|
||||
|
||||
/**
|
||||
* Query specific matches highlighting
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||||
*/
|
||||
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
|
||||
{
|
||||
/** Skip fields detection. We don't need it, since we expect all fields presented in the HTML body and don't differentiate them */
|
||||
|
||||
/** Skip exact term matching recognition, keyword fields highlighting is not supported */
|
||||
|
||||
/** Skip wildcard queries recognition. Supported wildcards are removed by text analyzer */
|
||||
|
||||
|
||||
// tokenize phrase using current analyzer and process it as a phrase query
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_phrase, $this->_phraseEncoding);
|
||||
|
||||
if (count($tokens) == 0) {
|
||||
// Do nothing
|
||||
return;
|
||||
}
|
||||
|
||||
if (count($tokens) == 1) {
|
||||
$highlighter->highlight($tokens[0]->getTermText());
|
||||
return;
|
||||
}
|
||||
|
||||
//It's non-trivial phrase query
|
||||
$words = array();
|
||||
foreach ($tokens as $token) {
|
||||
$words[] = $token->getTermText();
|
||||
}
|
||||
$highlighter->highlight($words);
|
||||
}
|
||||
|
||||
/**
|
||||
* Print a query
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
// It's used only for query visualisation, so we don't care about characters escaping
|
||||
if ($this->_field !== null) {
|
||||
$query = $this->_field . ':';
|
||||
} else {
|
||||
$query = '';
|
||||
}
|
||||
|
||||
$query .= '"' . $this->_phrase . '"';
|
||||
|
||||
if ($this->_slop != 0) {
|
||||
$query .= '~' . $this->_slop;
|
||||
}
|
||||
|
||||
if ($this->getBoost() != 1) {
|
||||
$query .= '^' . round($this->getBoost(), 4);
|
||||
}
|
||||
|
||||
return $query;
|
||||
}
|
||||
}
|
341
thirdparty/Zend/Search/Lucene/Search/Query/Preprocessing/Term.php
vendored
Normal file
341
thirdparty/Zend/Search/Lucene/Search/Query/Preprocessing/Term.php
vendored
Normal file
@ -0,0 +1,341 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Term.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Processing */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Preprocessing.php';
|
||||
|
||||
|
||||
/**
|
||||
* It's an internal abstract class intended to finalize ase a query processing after query parsing.
|
||||
* This type of query is not actually involved into query execution.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @internal
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Query_Preprocessing_Term extends Zend_Search_Lucene_Search_Query_Preprocessing
|
||||
{
|
||||
/**
|
||||
* word (query parser lexeme) to find.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_word;
|
||||
|
||||
/**
|
||||
* Word encoding (field name is always provided using UTF-8 encoding since it may be retrieved from index).
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_encoding;
|
||||
|
||||
|
||||
/**
|
||||
* Field name.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_field;
|
||||
|
||||
/**
|
||||
* Class constructor. Create a new preprocessing object for prase query.
|
||||
*
|
||||
* @param string $word Non-tokenized word (query parser lexeme) to search.
|
||||
* @param string $encoding Word encoding.
|
||||
* @param string $fieldName Field name.
|
||||
*/
|
||||
public function __construct($word, $encoding, $fieldName)
|
||||
{
|
||||
$this->_word = $word;
|
||||
$this->_encoding = $encoding;
|
||||
$this->_field = $fieldName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Re-write query into primitive queries in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function rewrite(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
if ($this->_field === null) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
|
||||
$query = new Zend_Search_Lucene_Search_Query_MultiTerm();
|
||||
$query->setBoost($this->getBoost());
|
||||
|
||||
$hasInsignificantSubqueries = false;
|
||||
|
||||
require_once 'Zend/Search/Lucene.php';
|
||||
if (Zend_Search_Lucene::getDefaultSearchField() === null) {
|
||||
$searchFields = $index->getFieldNames(true);
|
||||
} else {
|
||||
$searchFields = array(Zend_Search_Lucene::getDefaultSearchField());
|
||||
}
|
||||
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Preprocessing/Term.php';
|
||||
foreach ($searchFields as $fieldName) {
|
||||
$subquery = new Zend_Search_Lucene_Search_Query_Preprocessing_Term($this->_word,
|
||||
$this->_encoding,
|
||||
$fieldName);
|
||||
$rewrittenSubquery = $subquery->rewrite($index);
|
||||
foreach ($rewrittenSubquery->getQueryTerms() as $term) {
|
||||
$query->addTerm($term);
|
||||
}
|
||||
|
||||
if ($rewrittenSubquery instanceof Zend_Search_Lucene_Search_Query_Insignificant) {
|
||||
$hasInsignificantSubqueries = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (count($query->getTerms()) == 0) {
|
||||
$this->_matches = array();
|
||||
if ($hasInsignificantSubqueries) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Insignificant();
|
||||
} else {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Empty();
|
||||
}
|
||||
}
|
||||
|
||||
$this->_matches = $query->getQueryTerms();
|
||||
return $query;
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
// Recognize exact term matching (it corresponds to Keyword fields stored in the index)
|
||||
// encoding is not used since we expect binary matching
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
$term = new Zend_Search_Lucene_Index_Term($this->_word, $this->_field);
|
||||
if ($index->hasTerm($term)) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Term.php';
|
||||
$query = new Zend_Search_Lucene_Search_Query_Term($term);
|
||||
$query->setBoost($this->getBoost());
|
||||
|
||||
$this->_matches = $query->getQueryTerms();
|
||||
return $query;
|
||||
}
|
||||
|
||||
|
||||
// -------------------------------------
|
||||
// Recognize wildcard queries
|
||||
|
||||
/** @todo check for PCRE unicode support may be performed through Zend_Environment in some future */
|
||||
if (@preg_match('/\pL/u', 'a') == 1) {
|
||||
$word = iconv($this->_encoding, 'UTF-8', $this->_word);
|
||||
$wildcardsPattern = '/[*?]/u';
|
||||
$subPatternsEncoding = 'UTF-8';
|
||||
} else {
|
||||
$word = $this->_word;
|
||||
$wildcardsPattern = '/[*?]/';
|
||||
$subPatternsEncoding = $this->_encoding;
|
||||
}
|
||||
|
||||
$subPatterns = preg_split($wildcardsPattern, $word, -1, PREG_SPLIT_OFFSET_CAPTURE);
|
||||
|
||||
if (count($subPatterns) > 1) {
|
||||
// Wildcard query is recognized
|
||||
|
||||
$pattern = '';
|
||||
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
|
||||
foreach ($subPatterns as $id => $subPattern) {
|
||||
// Append corresponding wildcard character to the pattern before each sub-pattern (except first)
|
||||
if ($id != 0) {
|
||||
$pattern .= $word[ $subPattern[1] - 1 ];
|
||||
}
|
||||
|
||||
// Check if each subputtern is a single word in terms of current analyzer
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($subPattern[0], $subPatternsEncoding);
|
||||
if (count($tokens) > 1) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Wildcard search is supported only for non-multiple word terms');
|
||||
}
|
||||
foreach ($tokens as $token) {
|
||||
$pattern .= $token->getTermText();
|
||||
}
|
||||
}
|
||||
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
$term = new Zend_Search_Lucene_Index_Term($pattern, $this->_field);
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Wildcard.php';
|
||||
$query = new Zend_Search_Lucene_Search_Query_Wildcard($term);
|
||||
$query->setBoost($this->getBoost());
|
||||
|
||||
// Get rewritten query. Important! It also fills terms matching container.
|
||||
$rewrittenQuery = $query->rewrite($index);
|
||||
$this->_matches = $query->getQueryTerms();
|
||||
|
||||
return $rewrittenQuery;
|
||||
}
|
||||
|
||||
|
||||
// -------------------------------------
|
||||
// Recognize one-term multi-term and "insignificant" queries
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_word, $this->_encoding);
|
||||
|
||||
if (count($tokens) == 0) {
|
||||
$this->_matches = array();
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Insignificant();
|
||||
}
|
||||
|
||||
if (count($tokens) == 1) {
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
$term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Term.php';
|
||||
$query = new Zend_Search_Lucene_Search_Query_Term($term);
|
||||
$query->setBoost($this->getBoost());
|
||||
|
||||
$this->_matches = $query->getQueryTerms();
|
||||
return $query;
|
||||
}
|
||||
|
||||
//It's not insignificant or one term query
|
||||
require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
|
||||
$query = new Zend_Search_Lucene_Search_Query_MultiTerm();
|
||||
|
||||
/**
|
||||
* @todo Process $token->getPositionIncrement() to support stemming, synonyms and other
|
||||
* analizer design features
|
||||
*/
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
foreach ($tokens as $token) {
|
||||
$term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $this->_field);
|
||||
$query->addTerm($term, true); // all subterms are required
|
||||
}
|
||||
|
||||
$query->setBoost($this->getBoost());
|
||||
|
||||
$this->_matches = $query->getQueryTerms();
|
||||
return $query;
|
||||
}
|
||||
|
||||
/**
|
||||
* Query specific matches highlighting
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||||
*/
|
||||
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
|
||||
{
|
||||
/** Skip fields detection. We don't need it, since we expect all fields presented in the HTML body and don't differentiate them */
|
||||
|
||||
/** Skip exact term matching recognition, keyword fields highlighting is not supported */
|
||||
|
||||
// -------------------------------------
|
||||
// Recognize wildcard queries
|
||||
/** @todo check for PCRE unicode support may be performed through Zend_Environment in some future */
|
||||
if (@preg_match('/\pL/u', 'a') == 1) {
|
||||
$word = iconv($this->_encoding, 'UTF-8', $this->_word);
|
||||
$wildcardsPattern = '/[*?]/u';
|
||||
$subPatternsEncoding = 'UTF-8';
|
||||
} else {
|
||||
$word = $this->_word;
|
||||
$wildcardsPattern = '/[*?]/';
|
||||
$subPatternsEncoding = $this->_encoding;
|
||||
}
|
||||
$subPatterns = preg_split($wildcardsPattern, $word, -1, PREG_SPLIT_OFFSET_CAPTURE);
|
||||
if (count($subPatterns) > 1) {
|
||||
// Wildcard query is recognized
|
||||
|
||||
$pattern = '';
|
||||
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
|
||||
foreach ($subPatterns as $id => $subPattern) {
|
||||
// Append corresponding wildcard character to the pattern before each sub-pattern (except first)
|
||||
if ($id != 0) {
|
||||
$pattern .= $word[ $subPattern[1] - 1 ];
|
||||
}
|
||||
|
||||
// Check if each subputtern is a single word in terms of current analyzer
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($subPattern[0], $subPatternsEncoding);
|
||||
if (count($tokens) > 1) {
|
||||
// Do nothing (nothing is highlighted)
|
||||
return;
|
||||
}
|
||||
foreach ($tokens as $token) {
|
||||
$pattern .= $token->getTermText();
|
||||
}
|
||||
}
|
||||
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
$term = new Zend_Search_Lucene_Index_Term($pattern, $this->_field);
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Wildcard.php';
|
||||
$query = new Zend_Search_Lucene_Search_Query_Wildcard($term);
|
||||
|
||||
$query->_highlightMatches($highlighter);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// -------------------------------------
|
||||
// Recognize one-term multi-term and "insignificant" queries
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_word, $this->_encoding);
|
||||
|
||||
if (count($tokens) == 0) {
|
||||
// Do nothing
|
||||
return;
|
||||
}
|
||||
|
||||
if (count($tokens) == 1) {
|
||||
$highlighter->highlight($tokens[0]->getTermText());
|
||||
return;
|
||||
}
|
||||
|
||||
//It's not insignificant or one term query
|
||||
$words = array();
|
||||
foreach ($tokens as $token) {
|
||||
$words[] = $token->getTermText();
|
||||
}
|
||||
$highlighter->highlight($words);
|
||||
}
|
||||
|
||||
/**
|
||||
* Print a query
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
// It's used only for query visualisation, so we don't care about characters escaping
|
||||
if ($this->_field !== null) {
|
||||
$query = $this->_field . ':';
|
||||
} else {
|
||||
$query = '';
|
||||
}
|
||||
|
||||
$query .= $this->_word;
|
||||
|
||||
if ($this->getBoost() != 1) {
|
||||
$query .= '^' . round($this->getBoost(), 4);
|
||||
}
|
||||
|
||||
return $query;
|
||||
}
|
||||
}
|
377
thirdparty/Zend/Search/Lucene/Search/Query/Range.php
vendored
Normal file
377
thirdparty/Zend/Search/Lucene/Search/Query/Range.php
vendored
Normal file
@ -0,0 +1,377 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Range.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query */
|
||||
require_once 'Zend/Search/Lucene/Search/Query.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Query_Range extends Zend_Search_Lucene_Search_Query
|
||||
{
|
||||
/**
|
||||
* Lower term.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Index_Term
|
||||
*/
|
||||
private $_lowerTerm;
|
||||
|
||||
/**
|
||||
* Upper term.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Index_Term
|
||||
*/
|
||||
private $_upperTerm;
|
||||
|
||||
|
||||
/**
|
||||
* Search field
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_field;
|
||||
|
||||
/**
|
||||
* Inclusive
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
private $_inclusive;
|
||||
|
||||
/**
|
||||
* Matched terms.
|
||||
*
|
||||
* Matched terms list.
|
||||
* It's filled during the search (rewrite operation) and may be used for search result
|
||||
* post-processing
|
||||
*
|
||||
* Array of Zend_Search_Lucene_Index_Term objects
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_matches = null;
|
||||
|
||||
|
||||
/**
|
||||
* Zend_Search_Lucene_Search_Query_Range constructor.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term|null $lowerTerm
|
||||
* @param Zend_Search_Lucene_Index_Term|null $upperTerm
|
||||
* @param boolean $inclusive
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function __construct($lowerTerm, $upperTerm, $inclusive)
|
||||
{
|
||||
if ($lowerTerm === null && $upperTerm === null) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('At least one term must be non-null');
|
||||
}
|
||||
if ($lowerTerm !== null && $upperTerm !== null && $lowerTerm->field != $upperTerm->field) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Both terms must be for the same field');
|
||||
}
|
||||
|
||||
$this->_field = ($lowerTerm !== null)? $lowerTerm->field : $upperTerm->field;
|
||||
$this->_lowerTerm = $lowerTerm;
|
||||
$this->_upperTerm = $upperTerm;
|
||||
$this->_inclusive = $inclusive;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get query field name
|
||||
*
|
||||
* @return string|null
|
||||
*/
|
||||
public function getField()
|
||||
{
|
||||
return $this->_field;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get lower term
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_Term|null
|
||||
*/
|
||||
public function getLowerTerm()
|
||||
{
|
||||
return $this->_lowerTerm;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get upper term
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_Term|null
|
||||
*/
|
||||
public function getUpperTerm()
|
||||
{
|
||||
return $this->_upperTerm;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get upper term
|
||||
*
|
||||
* @return boolean
|
||||
*/
|
||||
public function isInclusive()
|
||||
{
|
||||
return $this->_inclusive;
|
||||
}
|
||||
|
||||
/**
|
||||
* Re-write query into primitive queries in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function rewrite(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
$this->_matches = array();
|
||||
|
||||
if ($this->_field === null) {
|
||||
// Search through all fields
|
||||
$fields = $index->getFieldNames(true /* indexed fields list */);
|
||||
} else {
|
||||
$fields = array($this->_field);
|
||||
}
|
||||
|
||||
require_once 'Zend/Search/Lucene.php';
|
||||
$maxTerms = Zend_Search_Lucene::getTermsPerQueryLimit();
|
||||
foreach ($fields as $field) {
|
||||
$index->resetTermsStream();
|
||||
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
if ($this->_lowerTerm !== null) {
|
||||
$lowerTerm = new Zend_Search_Lucene_Index_Term($this->_lowerTerm->text, $field);
|
||||
|
||||
$index->skipTo($lowerTerm);
|
||||
|
||||
if (!$this->_inclusive &&
|
||||
$index->currentTerm() == $lowerTerm) {
|
||||
// Skip lower term
|
||||
$index->nextTerm();
|
||||
}
|
||||
} else {
|
||||
$index->skipTo(new Zend_Search_Lucene_Index_Term('', $field));
|
||||
}
|
||||
|
||||
|
||||
if ($this->_upperTerm !== null) {
|
||||
// Walk up to the upper term
|
||||
$upperTerm = new Zend_Search_Lucene_Index_Term($this->_upperTerm->text, $field);
|
||||
|
||||
while ($index->currentTerm() !== null &&
|
||||
$index->currentTerm()->field == $field &&
|
||||
$index->currentTerm()->text < $upperTerm->text) {
|
||||
$this->_matches[] = $index->currentTerm();
|
||||
|
||||
if ($maxTerms != 0 && count($this->_matches) > $maxTerms) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
|
||||
}
|
||||
|
||||
$index->nextTerm();
|
||||
}
|
||||
|
||||
if ($this->_inclusive && $index->currentTerm() == $upperTerm) {
|
||||
// Include upper term into result
|
||||
$this->_matches[] = $upperTerm;
|
||||
}
|
||||
} else {
|
||||
// Walk up to the end of field data
|
||||
while ($index->currentTerm() !== null && $index->currentTerm()->field == $field) {
|
||||
$this->_matches[] = $index->currentTerm();
|
||||
|
||||
if ($maxTerms != 0 && count($this->_matches) > $maxTerms) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
|
||||
}
|
||||
|
||||
$index->nextTerm();
|
||||
}
|
||||
}
|
||||
|
||||
$index->closeTermsStream();
|
||||
}
|
||||
|
||||
if (count($this->_matches) == 0) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Empty();
|
||||
} else if (count($this->_matches) == 1) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Term.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Term(reset($this->_matches));
|
||||
} else {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
|
||||
$rewrittenQuery = new Zend_Search_Lucene_Search_Query_MultiTerm();
|
||||
|
||||
foreach ($this->_matches as $matchedTerm) {
|
||||
$rewrittenQuery->addTerm($matchedTerm);
|
||||
}
|
||||
|
||||
return $rewrittenQuery;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimize query in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function optimize(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Range query should not be directly used for search. Use $query->rewrite($index)');
|
||||
}
|
||||
|
||||
/**
|
||||
* Return query terms
|
||||
*
|
||||
* @return array
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function getQueryTerms()
|
||||
{
|
||||
if ($this->_matches === null) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Search or rewrite operations have to be performed before.');
|
||||
}
|
||||
|
||||
return $this->_matches;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an appropriate Weight implementation for this query.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return Zend_Search_Lucene_Search_Weight
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function createWeight(Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Range query should not be directly used for search. Use $query->rewrite($index)');
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Execute query in context of index reader
|
||||
* It also initializes necessary internal structures
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Range query should not be directly used for search. Use $query->rewrite($index)');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get document ids likely matching the query
|
||||
*
|
||||
* It's an array with document ids as keys (performance considerations)
|
||||
*
|
||||
* @return array
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function matchedDocs()
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Range query should not be directly used for search. Use $query->rewrite($index)');
|
||||
}
|
||||
|
||||
/**
|
||||
* Score specified document
|
||||
*
|
||||
* @param integer $docId
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return float
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function score($docId, Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Range query should not be directly used for search. Use $query->rewrite($index)');
|
||||
}
|
||||
|
||||
/**
|
||||
* Query specific matches highlighting
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||||
*/
|
||||
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
|
||||
{
|
||||
$words = array();
|
||||
|
||||
$docBody = $highlighter->getDocument()->getFieldUtf8Value('body');
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($docBody, 'UTF-8');
|
||||
|
||||
$lowerTermText = ($this->_lowerTerm !== null)? $this->_lowerTerm->text : null;
|
||||
$upperTermText = ($this->_upperTerm !== null)? $this->_upperTerm->text : null;
|
||||
|
||||
if ($this->_inclusive) {
|
||||
foreach ($tokens as $token) {
|
||||
$termText = $token->getTermText();
|
||||
if (($lowerTermText == null || $lowerTermText <= $termText) &&
|
||||
($upperTermText == null || $termText <= $upperTermText)) {
|
||||
$words[] = $termText;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
foreach ($tokens as $token) {
|
||||
$termText = $token->getTermText();
|
||||
if (($lowerTermText == null || $lowerTermText < $termText) &&
|
||||
($upperTermText == null || $termText < $upperTermText)) {
|
||||
$words[] = $termText;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$highlighter->highlight($words);
|
||||
}
|
||||
|
||||
/**
|
||||
* Print a query
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
// It's used only for query visualisation, so we don't care about characters escaping
|
||||
return (($this->_field === null)? '' : $this->_field . ':')
|
||||
. (($this->_inclusive)? '[' : '{')
|
||||
. (($this->_lowerTerm !== null)? $this->_lowerTerm->text : 'null')
|
||||
. ' TO '
|
||||
. (($this->_upperTerm !== null)? $this->_upperTerm->text : 'null')
|
||||
. (($this->_inclusive)? ']' : '}')
|
||||
. (($this->getBoost() != 1)? '^' . round($this->getBoost(), 4) : '');
|
||||
}
|
||||
}
|
||||
|
228
thirdparty/Zend/Search/Lucene/Search/Query/Term.php
vendored
Normal file
228
thirdparty/Zend/Search/Lucene/Search/Query/Term.php
vendored
Normal file
@ -0,0 +1,228 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Term.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query */
|
||||
require_once 'Zend/Search/Lucene/Search/Query.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Query_Term extends Zend_Search_Lucene_Search_Query
|
||||
{
|
||||
/**
|
||||
* Term to find.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Index_Term
|
||||
*/
|
||||
private $_term;
|
||||
|
||||
/**
|
||||
* Documents vector.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_docVector = null;
|
||||
|
||||
/**
|
||||
* Term freqs vector.
|
||||
* array(docId => freq, ...)
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_termFreqs;
|
||||
|
||||
|
||||
/**
|
||||
* Zend_Search_Lucene_Search_Query_Term constructor
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param boolean $sign
|
||||
*/
|
||||
public function __construct(Zend_Search_Lucene_Index_Term $term)
|
||||
{
|
||||
$this->_term = $term;
|
||||
}
|
||||
|
||||
/**
|
||||
* Re-write query into primitive queries in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function rewrite(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
if ($this->_term->field != null) {
|
||||
return $this;
|
||||
} else {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
|
||||
$query = new Zend_Search_Lucene_Search_Query_MultiTerm();
|
||||
$query->setBoost($this->getBoost());
|
||||
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
foreach ($index->getFieldNames(true) as $fieldName) {
|
||||
$term = new Zend_Search_Lucene_Index_Term($this->_term->text, $fieldName);
|
||||
|
||||
$query->addTerm($term);
|
||||
}
|
||||
|
||||
return $query->rewrite($index);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimize query in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function optimize(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
// Check, that index contains specified term
|
||||
if (!$index->hasTerm($this->_term)) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Empty();
|
||||
}
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructs an appropriate Weight implementation for this query.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return Zend_Search_Lucene_Search_Weight
|
||||
*/
|
||||
public function createWeight(Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Search/Weight/Term.php';
|
||||
$this->_weight = new Zend_Search_Lucene_Search_Weight_Term($this->_term, $this, $reader);
|
||||
return $this->_weight;
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute query in context of index reader
|
||||
* It also initializes necessary internal structures
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
*/
|
||||
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
|
||||
{
|
||||
$this->_docVector = array_flip($reader->termDocs($this->_term, $docsFilter));
|
||||
$this->_termFreqs = $reader->termFreqs($this->_term, $docsFilter);
|
||||
|
||||
// Initialize weight if it's not done yet
|
||||
$this->_initWeight($reader);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get document ids likely matching the query
|
||||
*
|
||||
* It's an array with document ids as keys (performance considerations)
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function matchedDocs()
|
||||
{
|
||||
return $this->_docVector;
|
||||
}
|
||||
|
||||
/**
|
||||
* Score specified document
|
||||
*
|
||||
* @param integer $docId
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return float
|
||||
*/
|
||||
public function score($docId, Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
if (isset($this->_docVector[$docId])) {
|
||||
return $reader->getSimilarity()->tf($this->_termFreqs[$docId]) *
|
||||
$this->_weight->getValue() *
|
||||
$reader->norm($docId, $this->_term->field) *
|
||||
$this->getBoost();
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return query terms
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getQueryTerms()
|
||||
{
|
||||
return array($this->_term);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return query term
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_Term
|
||||
*/
|
||||
public function getTerm()
|
||||
{
|
||||
return $this->_term;
|
||||
}
|
||||
|
||||
/**
|
||||
* Query specific matches highlighting
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||||
*/
|
||||
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
|
||||
{
|
||||
$highlighter->highlight($this->_term->text);
|
||||
}
|
||||
|
||||
/**
|
||||
* Print a query
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
// It's used only for query visualisation, so we don't care about characters escaping
|
||||
if ($this->_term->field !== null) {
|
||||
$query = $this->_term->field . ':';
|
||||
} else {
|
||||
$query = '';
|
||||
}
|
||||
|
||||
$query .= $this->_term->text;
|
||||
|
||||
if ($this->getBoost() != 1) {
|
||||
$query = $query . '^' . round($this->getBoost(), 4);
|
||||
}
|
||||
|
||||
return $query;
|
||||
}
|
||||
}
|
||||
|
362
thirdparty/Zend/Search/Lucene/Search/Query/Wildcard.php
vendored
Normal file
362
thirdparty/Zend/Search/Lucene/Search/Query/Wildcard.php
vendored
Normal file
@ -0,0 +1,362 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Wildcard.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query */
|
||||
require_once 'Zend/Search/Lucene/Search/Query.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Query_Wildcard extends Zend_Search_Lucene_Search_Query
|
||||
{
|
||||
/**
|
||||
* Search pattern.
|
||||
*
|
||||
* Field has to be fully specified or has to be null
|
||||
* Text may contain '*' or '?' symbols
|
||||
*
|
||||
* @var Zend_Search_Lucene_Index_Term
|
||||
*/
|
||||
private $_pattern;
|
||||
|
||||
/**
|
||||
* Matched terms.
|
||||
*
|
||||
* Matched terms list.
|
||||
* It's filled during the search (rewrite operation) and may be used for search result
|
||||
* post-processing
|
||||
*
|
||||
* Array of Zend_Search_Lucene_Index_Term objects
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_matches = null;
|
||||
|
||||
/**
|
||||
* Minimum term prefix length (number of minimum non-wildcard characters)
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private static $_minPrefixLength = 3;
|
||||
|
||||
/**
|
||||
* Zend_Search_Lucene_Search_Query_Wildcard constructor.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $pattern
|
||||
*/
|
||||
public function __construct(Zend_Search_Lucene_Index_Term $pattern)
|
||||
{
|
||||
$this->_pattern = $pattern;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get minimum prefix length
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public static function getMinPrefixLength()
|
||||
{
|
||||
return self::$_minPrefixLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set minimum prefix length
|
||||
*
|
||||
* @param integer $minPrefixLength
|
||||
*/
|
||||
public static function setMinPrefixLength($minPrefixLength)
|
||||
{
|
||||
self::$_minPrefixLength = $minPrefixLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get terms prefix
|
||||
*
|
||||
* @param string $word
|
||||
* @return string
|
||||
*/
|
||||
private static function _getPrefix($word)
|
||||
{
|
||||
$questionMarkPosition = strpos($word, '?');
|
||||
$astrericPosition = strpos($word, '*');
|
||||
|
||||
if ($questionMarkPosition !== false) {
|
||||
if ($astrericPosition !== false) {
|
||||
return substr($word, 0, min($questionMarkPosition, $astrericPosition));
|
||||
}
|
||||
|
||||
return substr($word, 0, $questionMarkPosition);
|
||||
} else if ($astrericPosition !== false) {
|
||||
return substr($word, 0, $astrericPosition);
|
||||
}
|
||||
|
||||
return $word;
|
||||
}
|
||||
|
||||
/**
|
||||
* Re-write query into primitive queries in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function rewrite(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
$this->_matches = array();
|
||||
|
||||
if ($this->_pattern->field === null) {
|
||||
// Search through all fields
|
||||
$fields = $index->getFieldNames(true /* indexed fields list */);
|
||||
} else {
|
||||
$fields = array($this->_pattern->field);
|
||||
}
|
||||
|
||||
$prefix = self::_getPrefix($this->_pattern->text);
|
||||
$prefixLength = strlen($prefix);
|
||||
$matchExpression = '/^' . str_replace(array('\\?', '\\*'), array('.', '.*') , preg_quote($this->_pattern->text, '/')) . '$/';
|
||||
|
||||
if ($prefixLength < self::$_minPrefixLength) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('At least ' . self::$_minPrefixLength . ' non-wildcard characters are required at the beginning of pattern.');
|
||||
}
|
||||
|
||||
/** @todo check for PCRE unicode support may be performed through Zend_Environment in some future */
|
||||
if (@preg_match('/\pL/u', 'a') == 1) {
|
||||
// PCRE unicode support is turned on
|
||||
// add Unicode modifier to the match expression
|
||||
$matchExpression .= 'u';
|
||||
}
|
||||
|
||||
$maxTerms = Zend_Search_Lucene::getTermsPerQueryLimit();
|
||||
foreach ($fields as $field) {
|
||||
$index->resetTermsStream();
|
||||
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
if ($prefix != '') {
|
||||
$index->skipTo(new Zend_Search_Lucene_Index_Term($prefix, $field));
|
||||
|
||||
while ($index->currentTerm() !== null &&
|
||||
$index->currentTerm()->field == $field &&
|
||||
substr($index->currentTerm()->text, 0, $prefixLength) == $prefix) {
|
||||
if (preg_match($matchExpression, $index->currentTerm()->text) === 1) {
|
||||
$this->_matches[] = $index->currentTerm();
|
||||
|
||||
if ($maxTerms != 0 && count($this->_matches) > $maxTerms) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
|
||||
}
|
||||
}
|
||||
|
||||
$index->nextTerm();
|
||||
}
|
||||
} else {
|
||||
$index->skipTo(new Zend_Search_Lucene_Index_Term('', $field));
|
||||
|
||||
while ($index->currentTerm() !== null && $index->currentTerm()->field == $field) {
|
||||
if (preg_match($matchExpression, $index->currentTerm()->text) === 1) {
|
||||
$this->_matches[] = $index->currentTerm();
|
||||
|
||||
if ($maxTerms != 0 && count($this->_matches) > $maxTerms) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
|
||||
}
|
||||
}
|
||||
|
||||
$index->nextTerm();
|
||||
}
|
||||
}
|
||||
|
||||
$index->closeTermsStream();
|
||||
}
|
||||
|
||||
if (count($this->_matches) == 0) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Empty();
|
||||
} else if (count($this->_matches) == 1) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Term.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Term(reset($this->_matches));
|
||||
} else {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
|
||||
$rewrittenQuery = new Zend_Search_Lucene_Search_Query_MultiTerm();
|
||||
|
||||
foreach ($this->_matches as $matchedTerm) {
|
||||
$rewrittenQuery->addTerm($matchedTerm);
|
||||
}
|
||||
|
||||
return $rewrittenQuery;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimize query in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function optimize(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns query pattern
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_Term
|
||||
*/
|
||||
public function getPattern()
|
||||
{
|
||||
return $this->_pattern;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return query terms
|
||||
*
|
||||
* @return array
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function getQueryTerms()
|
||||
{
|
||||
if ($this->_matches === null) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Search has to be performed first to get matched terms');
|
||||
}
|
||||
|
||||
return $this->_matches;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an appropriate Weight implementation for this query.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return Zend_Search_Lucene_Search_Weight
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function createWeight(Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Execute query in context of index reader
|
||||
* It also initializes necessary internal structures
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get document ids likely matching the query
|
||||
*
|
||||
* It's an array with document ids as keys (performance considerations)
|
||||
*
|
||||
* @return array
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function matchedDocs()
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
|
||||
}
|
||||
|
||||
/**
|
||||
* Score specified document
|
||||
*
|
||||
* @param integer $docId
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return float
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function score($docId, Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
|
||||
}
|
||||
|
||||
/**
|
||||
* Query specific matches highlighting
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||||
*/
|
||||
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
|
||||
{
|
||||
$words = array();
|
||||
|
||||
$matchExpression = '/^' . str_replace(array('\\?', '\\*'), array('.', '.*') , preg_quote($this->_pattern->text, '/')) . '$/';
|
||||
if (@preg_match('/\pL/u', 'a') == 1) {
|
||||
// PCRE unicode support is turned on
|
||||
// add Unicode modifier to the match expression
|
||||
$matchExpression .= 'u';
|
||||
}
|
||||
|
||||
$docBody = $highlighter->getDocument()->getFieldUtf8Value('body');
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($docBody, 'UTF-8');
|
||||
foreach ($tokens as $token) {
|
||||
if (preg_match($matchExpression, $token->getTermText()) === 1) {
|
||||
$words[] = $token->getTermText();
|
||||
}
|
||||
}
|
||||
|
||||
$highlighter->highlight($words);
|
||||
}
|
||||
|
||||
/**
|
||||
* Print a query
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
// It's used only for query visualisation, so we don't care about characters escaping
|
||||
if ($this->_pattern->field !== null) {
|
||||
$query = $this->_pattern->field . ':';
|
||||
} else {
|
||||
$query = '';
|
||||
}
|
||||
|
||||
$query .= $this->_pattern->text;
|
||||
|
||||
if ($this->getBoost() != 1) {
|
||||
$query = $query . '^' . round($this->getBoost(), 4);
|
||||
}
|
||||
|
||||
return $query;
|
||||
}
|
||||
}
|
||||
|
67
thirdparty/Zend/Search/Lucene/Search/QueryEntry.php
vendored
Normal file
67
thirdparty/Zend/Search/Lucene/Search/QueryEntry.php
vendored
Normal file
@ -0,0 +1,67 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: QueryEntry.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
abstract class Zend_Search_Lucene_Search_QueryEntry
|
||||
{
|
||||
/**
|
||||
* Query entry boost factor
|
||||
*
|
||||
* @var float
|
||||
*/
|
||||
protected $_boost = 1.0;
|
||||
|
||||
|
||||
/**
|
||||
* Process modifier ('~')
|
||||
*
|
||||
* @param mixed $parameter
|
||||
*/
|
||||
abstract public function processFuzzyProximityModifier($parameter = null);
|
||||
|
||||
|
||||
/**
|
||||
* Transform entry to a subquery
|
||||
*
|
||||
* @param string $encoding
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
abstract public function getQuery($encoding);
|
||||
|
||||
/**
|
||||
* Boost query entry
|
||||
*
|
||||
* @param float $boostFactor
|
||||
*/
|
||||
public function boost($boostFactor)
|
||||
{
|
||||
$this->_boost *= $boostFactor;
|
||||
}
|
||||
|
||||
|
||||
}
|
116
thirdparty/Zend/Search/Lucene/Search/QueryEntry/Phrase.php
vendored
Normal file
116
thirdparty/Zend/Search/Lucene/Search/QueryEntry/Phrase.php
vendored
Normal file
@ -0,0 +1,116 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Phrase.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryEntry */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryEntry.php';
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_QueryEntry_Phrase extends Zend_Search_Lucene_Search_QueryEntry
|
||||
{
|
||||
/**
|
||||
* Phrase value
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_phrase;
|
||||
|
||||
/**
|
||||
* Field
|
||||
*
|
||||
* @var string|null
|
||||
*/
|
||||
private $_field;
|
||||
|
||||
|
||||
/**
|
||||
* Proximity phrase query
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
private $_proximityQuery = false;
|
||||
|
||||
/**
|
||||
* Words distance, used for proximiti queries
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_wordsDistance = 0;
|
||||
|
||||
|
||||
/**
|
||||
* Object constractor
|
||||
*
|
||||
* @param string $phrase
|
||||
* @param string $field
|
||||
*/
|
||||
public function __construct($phrase, $field)
|
||||
{
|
||||
$this->_phrase = $phrase;
|
||||
$this->_field = $field;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process modifier ('~')
|
||||
*
|
||||
* @param mixed $parameter
|
||||
*/
|
||||
public function processFuzzyProximityModifier($parameter = null)
|
||||
{
|
||||
$this->_proximityQuery = true;
|
||||
|
||||
if ($parameter !== null) {
|
||||
$this->_wordsDistance = $parameter;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Transform entry to a subquery
|
||||
*
|
||||
* @param string $encoding
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
* @throws Zend_Search_Lucene_Search_QueryParserException
|
||||
*/
|
||||
public function getQuery($encoding)
|
||||
{
|
||||
/** Zend_Search_Lucene_Search_Query_Preprocessing_Phrase */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Preprocessing/Phrase.php';
|
||||
$query = new Zend_Search_Lucene_Search_Query_Preprocessing_Phrase($this->_phrase,
|
||||
$encoding,
|
||||
($this->_field !== null)?
|
||||
iconv($encoding, 'UTF-8', $this->_field) :
|
||||
null);
|
||||
|
||||
if ($this->_proximityQuery) {
|
||||
$query->setSlop($this->_wordsDistance);
|
||||
}
|
||||
|
||||
$query->setBoost($this->_boost);
|
||||
|
||||
return $query;
|
||||
}
|
||||
}
|
77
thirdparty/Zend/Search/Lucene/Search/QueryEntry/Subquery.php
vendored
Normal file
77
thirdparty/Zend/Search/Lucene/Search/QueryEntry/Subquery.php
vendored
Normal file
@ -0,0 +1,77 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Subquery.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryEntry */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryEntry.php';
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_QueryEntry_Subquery extends Zend_Search_Lucene_Search_QueryEntry
|
||||
{
|
||||
/**
|
||||
* Query
|
||||
*
|
||||
* @var Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
private $_query;
|
||||
|
||||
/**
|
||||
* Object constractor
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_Query $query
|
||||
*/
|
||||
public function __construct(Zend_Search_Lucene_Search_Query $query)
|
||||
{
|
||||
$this->_query = $query;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process modifier ('~')
|
||||
*
|
||||
* @param mixed $parameter
|
||||
* @throws Zend_Search_Lucene_Search_QueryParserException
|
||||
*/
|
||||
public function processFuzzyProximityModifier($parameter = null)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' sign must follow term or phrase');
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Transform entry to a subquery
|
||||
*
|
||||
* @param string $encoding
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function getQuery($encoding)
|
||||
{
|
||||
$this->_query->setBoost($this->_boost);
|
||||
|
||||
return $this->_query;
|
||||
}
|
||||
}
|
130
thirdparty/Zend/Search/Lucene/Search/QueryEntry/Term.php
vendored
Normal file
130
thirdparty/Zend/Search/Lucene/Search/QueryEntry/Term.php
vendored
Normal file
@ -0,0 +1,130 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Term.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryEntry */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryEntry.php';
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_QueryEntry_Term extends Zend_Search_Lucene_Search_QueryEntry
|
||||
{
|
||||
/**
|
||||
* Term value
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_term;
|
||||
|
||||
/**
|
||||
* Field
|
||||
*
|
||||
* @var string|null
|
||||
*/
|
||||
private $_field;
|
||||
|
||||
|
||||
/**
|
||||
* Fuzzy search query
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
private $_fuzzyQuery = false;
|
||||
|
||||
/**
|
||||
* Similarity
|
||||
*
|
||||
* @var float
|
||||
*/
|
||||
private $_similarity = 1.;
|
||||
|
||||
|
||||
/**
|
||||
* Object constractor
|
||||
*
|
||||
* @param string $term
|
||||
* @param string $field
|
||||
*/
|
||||
public function __construct($term, $field)
|
||||
{
|
||||
$this->_term = $term;
|
||||
$this->_field = $field;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process modifier ('~')
|
||||
*
|
||||
* @param mixed $parameter
|
||||
*/
|
||||
public function processFuzzyProximityModifier($parameter = null)
|
||||
{
|
||||
$this->_fuzzyQuery = true;
|
||||
|
||||
if ($parameter !== null) {
|
||||
$this->_similarity = $parameter;
|
||||
} else {
|
||||
/** Zend_Search_Lucene_Search_Query_Fuzzy */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Fuzzy.php';
|
||||
$this->_similarity = Zend_Search_Lucene_Search_Query_Fuzzy::DEFAULT_MIN_SIMILARITY;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Transform entry to a subquery
|
||||
*
|
||||
* @param string $encoding
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
* @throws Zend_Search_Lucene_Search_QueryParserException
|
||||
*/
|
||||
public function getQuery($encoding)
|
||||
{
|
||||
if ($this->_fuzzyQuery) {
|
||||
/** Zend_Search_Lucene_Search_Query_Preprocessing_Fuzzy */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Preprocessing/Fuzzy.php';
|
||||
$query = new Zend_Search_Lucene_Search_Query_Preprocessing_Fuzzy($this->_term,
|
||||
$encoding,
|
||||
($this->_field !== null)?
|
||||
iconv($encoding, 'UTF-8', $this->_field) :
|
||||
null,
|
||||
$this->_similarity
|
||||
);
|
||||
$query->setBoost($this->_boost);
|
||||
return $query;
|
||||
}
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Preprocessing_Term */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Preprocessing/Term.php';
|
||||
$query = new Zend_Search_Lucene_Search_Query_Preprocessing_Term($this->_term,
|
||||
$encoding,
|
||||
($this->_field !== null)?
|
||||
iconv($encoding, 'UTF-8', $this->_field) :
|
||||
null
|
||||
);
|
||||
$query->setBoost($this->_boost);
|
||||
return $query;
|
||||
}
|
||||
}
|
110
thirdparty/Zend/Search/Lucene/Search/QueryHit.php
vendored
Normal file
110
thirdparty/Zend/Search/Lucene/Search/QueryHit.php
vendored
Normal file
@ -0,0 +1,110 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: QueryHit.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_QueryHit
|
||||
{
|
||||
/**
|
||||
* Object handle of the index
|
||||
* @var Zend_Search_Lucene_Interface
|
||||
*/
|
||||
protected $_index = null;
|
||||
|
||||
/**
|
||||
* Object handle of the document associated with this hit
|
||||
* @var Zend_Search_Lucene_Document
|
||||
*/
|
||||
protected $_document = null;
|
||||
|
||||
/**
|
||||
* Number of the document in the index
|
||||
* @var integer
|
||||
*/
|
||||
public $id;
|
||||
|
||||
/**
|
||||
* Score of the hit
|
||||
* @var float
|
||||
*/
|
||||
public $score;
|
||||
|
||||
|
||||
/**
|
||||
* Constructor - pass object handle of Zend_Search_Lucene_Interface index that produced
|
||||
* the hit so the document can be retrieved easily from the hit.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
*/
|
||||
|
||||
public function __construct(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Proxy.php';
|
||||
$this->_index = new Zend_Search_Lucene_Proxy($index);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Convenience function for getting fields from the document
|
||||
* associated with this hit.
|
||||
*
|
||||
* @param string $offset
|
||||
* @return string
|
||||
*/
|
||||
public function __get($offset)
|
||||
{
|
||||
return $this->getDocument()->getFieldValue($offset);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return the document object for this hit
|
||||
*
|
||||
* @return Zend_Search_Lucene_Document
|
||||
*/
|
||||
public function getDocument()
|
||||
{
|
||||
if (!$this->_document instanceof Zend_Search_Lucene_Document) {
|
||||
$this->_document = $this->_index->getDocument($this->id);
|
||||
}
|
||||
|
||||
return $this->_document;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return the index object for this hit
|
||||
*
|
||||
* @return Zend_Search_Lucene_Interface
|
||||
*/
|
||||
public function getIndex()
|
||||
{
|
||||
return $this->_index;
|
||||
}
|
||||
}
|
||||
|
510
thirdparty/Zend/Search/Lucene/Search/QueryLexer.php
vendored
Normal file
510
thirdparty/Zend/Search/Lucene/Search/QueryLexer.php
vendored
Normal file
@ -0,0 +1,510 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: QueryLexer.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
/** Zend_Search_Lucene_FSM */
|
||||
require_once 'Zend/Search/Lucene/FSM.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryParser */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryToken.php';
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_QueryLexer extends Zend_Search_Lucene_FSM
|
||||
{
|
||||
/** State Machine states */
|
||||
const ST_WHITE_SPACE = 0;
|
||||
const ST_SYNT_LEXEME = 1;
|
||||
const ST_LEXEME = 2;
|
||||
const ST_QUOTED_LEXEME = 3;
|
||||
const ST_ESCAPED_CHAR = 4;
|
||||
const ST_ESCAPED_QCHAR = 5;
|
||||
const ST_LEXEME_MODIFIER = 6;
|
||||
const ST_NUMBER = 7;
|
||||
const ST_MANTISSA = 8;
|
||||
const ST_ERROR = 9;
|
||||
|
||||
/** Input symbols */
|
||||
const IN_WHITE_SPACE = 0;
|
||||
const IN_SYNT_CHAR = 1;
|
||||
const IN_LEXEME_MODIFIER = 2;
|
||||
const IN_ESCAPE_CHAR = 3;
|
||||
const IN_QUOTE = 4;
|
||||
const IN_DECIMAL_POINT = 5;
|
||||
const IN_ASCII_DIGIT = 6;
|
||||
const IN_CHAR = 7;
|
||||
const IN_MUTABLE_CHAR = 8;
|
||||
|
||||
const QUERY_WHITE_SPACE_CHARS = " \n\r\t";
|
||||
const QUERY_SYNT_CHARS = ':()[]{}!|&';
|
||||
const QUERY_MUTABLE_CHARS = '+-';
|
||||
const QUERY_DOUBLECHARLEXEME_CHARS = '|&';
|
||||
const QUERY_LEXEMEMODIFIER_CHARS = '~^';
|
||||
const QUERY_ASCIIDIGITS_CHARS = '0123456789';
|
||||
|
||||
/**
|
||||
* List of recognized lexemes
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_lexemes;
|
||||
|
||||
/**
|
||||
* Query string (array of single- or non single-byte characters)
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_queryString;
|
||||
|
||||
/**
|
||||
* Current position within a query string
|
||||
* Used to create appropriate error messages
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_queryStringPosition;
|
||||
|
||||
/**
|
||||
* Recognized part of current lexeme
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_currentLexeme;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
parent::__construct( array(self::ST_WHITE_SPACE,
|
||||
self::ST_SYNT_LEXEME,
|
||||
self::ST_LEXEME,
|
||||
self::ST_QUOTED_LEXEME,
|
||||
self::ST_ESCAPED_CHAR,
|
||||
self::ST_ESCAPED_QCHAR,
|
||||
self::ST_LEXEME_MODIFIER,
|
||||
self::ST_NUMBER,
|
||||
self::ST_MANTISSA,
|
||||
self::ST_ERROR),
|
||||
array(self::IN_WHITE_SPACE,
|
||||
self::IN_SYNT_CHAR,
|
||||
self::IN_MUTABLE_CHAR,
|
||||
self::IN_LEXEME_MODIFIER,
|
||||
self::IN_ESCAPE_CHAR,
|
||||
self::IN_QUOTE,
|
||||
self::IN_DECIMAL_POINT,
|
||||
self::IN_ASCII_DIGIT,
|
||||
self::IN_CHAR));
|
||||
|
||||
|
||||
$lexemeModifierErrorAction = new Zend_Search_Lucene_FSMAction($this, 'lexModifierErrException');
|
||||
$quoteWithinLexemeErrorAction = new Zend_Search_Lucene_FSMAction($this, 'quoteWithinLexemeErrException');
|
||||
$wrongNumberErrorAction = new Zend_Search_Lucene_FSMAction($this, 'wrongNumberErrException');
|
||||
|
||||
|
||||
|
||||
$this->addRules(array( array(self::ST_WHITE_SPACE, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
|
||||
array(self::ST_WHITE_SPACE, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
|
||||
array(self::ST_WHITE_SPACE, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
|
||||
array(self::ST_WHITE_SPACE, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
|
||||
array(self::ST_WHITE_SPACE, self::IN_ESCAPE_CHAR, self::ST_ESCAPED_CHAR),
|
||||
array(self::ST_WHITE_SPACE, self::IN_QUOTE, self::ST_QUOTED_LEXEME),
|
||||
array(self::ST_WHITE_SPACE, self::IN_DECIMAL_POINT, self::ST_LEXEME),
|
||||
array(self::ST_WHITE_SPACE, self::IN_ASCII_DIGIT, self::ST_LEXEME),
|
||||
array(self::ST_WHITE_SPACE, self::IN_CHAR, self::ST_LEXEME)
|
||||
));
|
||||
$this->addRules(array( array(self::ST_SYNT_LEXEME, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
|
||||
array(self::ST_SYNT_LEXEME, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
|
||||
array(self::ST_SYNT_LEXEME, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
|
||||
array(self::ST_SYNT_LEXEME, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
|
||||
array(self::ST_SYNT_LEXEME, self::IN_ESCAPE_CHAR, self::ST_ESCAPED_CHAR),
|
||||
array(self::ST_SYNT_LEXEME, self::IN_QUOTE, self::ST_QUOTED_LEXEME),
|
||||
array(self::ST_SYNT_LEXEME, self::IN_DECIMAL_POINT, self::ST_LEXEME),
|
||||
array(self::ST_SYNT_LEXEME, self::IN_ASCII_DIGIT, self::ST_LEXEME),
|
||||
array(self::ST_SYNT_LEXEME, self::IN_CHAR, self::ST_LEXEME)
|
||||
));
|
||||
$this->addRules(array( array(self::ST_LEXEME, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
|
||||
array(self::ST_LEXEME, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
|
||||
array(self::ST_LEXEME, self::IN_MUTABLE_CHAR, self::ST_LEXEME),
|
||||
array(self::ST_LEXEME, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
|
||||
array(self::ST_LEXEME, self::IN_ESCAPE_CHAR, self::ST_ESCAPED_CHAR),
|
||||
|
||||
// IN_QUOTE not allowed
|
||||
array(self::ST_LEXEME, self::IN_QUOTE, self::ST_ERROR, $quoteWithinLexemeErrorAction),
|
||||
|
||||
array(self::ST_LEXEME, self::IN_DECIMAL_POINT, self::ST_LEXEME),
|
||||
array(self::ST_LEXEME, self::IN_ASCII_DIGIT, self::ST_LEXEME),
|
||||
array(self::ST_LEXEME, self::IN_CHAR, self::ST_LEXEME)
|
||||
));
|
||||
$this->addRules(array( array(self::ST_QUOTED_LEXEME, self::IN_WHITE_SPACE, self::ST_QUOTED_LEXEME),
|
||||
array(self::ST_QUOTED_LEXEME, self::IN_SYNT_CHAR, self::ST_QUOTED_LEXEME),
|
||||
array(self::ST_QUOTED_LEXEME, self::IN_MUTABLE_CHAR, self::ST_QUOTED_LEXEME),
|
||||
array(self::ST_QUOTED_LEXEME, self::IN_LEXEME_MODIFIER, self::ST_QUOTED_LEXEME),
|
||||
array(self::ST_QUOTED_LEXEME, self::IN_ESCAPE_CHAR, self::ST_ESCAPED_QCHAR),
|
||||
array(self::ST_QUOTED_LEXEME, self::IN_QUOTE, self::ST_WHITE_SPACE),
|
||||
array(self::ST_QUOTED_LEXEME, self::IN_DECIMAL_POINT, self::ST_QUOTED_LEXEME),
|
||||
array(self::ST_QUOTED_LEXEME, self::IN_ASCII_DIGIT, self::ST_QUOTED_LEXEME),
|
||||
array(self::ST_QUOTED_LEXEME, self::IN_CHAR, self::ST_QUOTED_LEXEME)
|
||||
));
|
||||
$this->addRules(array( array(self::ST_ESCAPED_CHAR, self::IN_WHITE_SPACE, self::ST_LEXEME),
|
||||
array(self::ST_ESCAPED_CHAR, self::IN_SYNT_CHAR, self::ST_LEXEME),
|
||||
array(self::ST_ESCAPED_CHAR, self::IN_MUTABLE_CHAR, self::ST_LEXEME),
|
||||
array(self::ST_ESCAPED_CHAR, self::IN_LEXEME_MODIFIER, self::ST_LEXEME),
|
||||
array(self::ST_ESCAPED_CHAR, self::IN_ESCAPE_CHAR, self::ST_LEXEME),
|
||||
array(self::ST_ESCAPED_CHAR, self::IN_QUOTE, self::ST_LEXEME),
|
||||
array(self::ST_ESCAPED_CHAR, self::IN_DECIMAL_POINT, self::ST_LEXEME),
|
||||
array(self::ST_ESCAPED_CHAR, self::IN_ASCII_DIGIT, self::ST_LEXEME),
|
||||
array(self::ST_ESCAPED_CHAR, self::IN_CHAR, self::ST_LEXEME)
|
||||
));
|
||||
$this->addRules(array( array(self::ST_ESCAPED_QCHAR, self::IN_WHITE_SPACE, self::ST_QUOTED_LEXEME),
|
||||
array(self::ST_ESCAPED_QCHAR, self::IN_SYNT_CHAR, self::ST_QUOTED_LEXEME),
|
||||
array(self::ST_ESCAPED_QCHAR, self::IN_MUTABLE_CHAR, self::ST_QUOTED_LEXEME),
|
||||
array(self::ST_ESCAPED_QCHAR, self::IN_LEXEME_MODIFIER, self::ST_QUOTED_LEXEME),
|
||||
array(self::ST_ESCAPED_QCHAR, self::IN_ESCAPE_CHAR, self::ST_QUOTED_LEXEME),
|
||||
array(self::ST_ESCAPED_QCHAR, self::IN_QUOTE, self::ST_QUOTED_LEXEME),
|
||||
array(self::ST_ESCAPED_QCHAR, self::IN_DECIMAL_POINT, self::ST_QUOTED_LEXEME),
|
||||
array(self::ST_ESCAPED_QCHAR, self::IN_ASCII_DIGIT, self::ST_QUOTED_LEXEME),
|
||||
array(self::ST_ESCAPED_QCHAR, self::IN_CHAR, self::ST_QUOTED_LEXEME)
|
||||
));
|
||||
$this->addRules(array( array(self::ST_LEXEME_MODIFIER, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
|
||||
array(self::ST_LEXEME_MODIFIER, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
|
||||
array(self::ST_LEXEME_MODIFIER, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
|
||||
array(self::ST_LEXEME_MODIFIER, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
|
||||
|
||||
// IN_ESCAPE_CHAR not allowed
|
||||
array(self::ST_LEXEME_MODIFIER, self::IN_ESCAPE_CHAR, self::ST_ERROR, $lexemeModifierErrorAction),
|
||||
|
||||
// IN_QUOTE not allowed
|
||||
array(self::ST_LEXEME_MODIFIER, self::IN_QUOTE, self::ST_ERROR, $lexemeModifierErrorAction),
|
||||
|
||||
|
||||
array(self::ST_LEXEME_MODIFIER, self::IN_DECIMAL_POINT, self::ST_MANTISSA),
|
||||
array(self::ST_LEXEME_MODIFIER, self::IN_ASCII_DIGIT, self::ST_NUMBER),
|
||||
|
||||
// IN_CHAR not allowed
|
||||
array(self::ST_LEXEME_MODIFIER, self::IN_CHAR, self::ST_ERROR, $lexemeModifierErrorAction),
|
||||
));
|
||||
$this->addRules(array( array(self::ST_NUMBER, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
|
||||
array(self::ST_NUMBER, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
|
||||
array(self::ST_NUMBER, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
|
||||
array(self::ST_NUMBER, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
|
||||
|
||||
// IN_ESCAPE_CHAR not allowed
|
||||
array(self::ST_NUMBER, self::IN_ESCAPE_CHAR, self::ST_ERROR, $wrongNumberErrorAction),
|
||||
|
||||
// IN_QUOTE not allowed
|
||||
array(self::ST_NUMBER, self::IN_QUOTE, self::ST_ERROR, $wrongNumberErrorAction),
|
||||
|
||||
array(self::ST_NUMBER, self::IN_DECIMAL_POINT, self::ST_MANTISSA),
|
||||
array(self::ST_NUMBER, self::IN_ASCII_DIGIT, self::ST_NUMBER),
|
||||
|
||||
// IN_CHAR not allowed
|
||||
array(self::ST_NUMBER, self::IN_CHAR, self::ST_ERROR, $wrongNumberErrorAction),
|
||||
));
|
||||
$this->addRules(array( array(self::ST_MANTISSA, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
|
||||
array(self::ST_MANTISSA, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
|
||||
array(self::ST_MANTISSA, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
|
||||
array(self::ST_MANTISSA, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
|
||||
|
||||
// IN_ESCAPE_CHAR not allowed
|
||||
array(self::ST_MANTISSA, self::IN_ESCAPE_CHAR, self::ST_ERROR, $wrongNumberErrorAction),
|
||||
|
||||
// IN_QUOTE not allowed
|
||||
array(self::ST_MANTISSA, self::IN_QUOTE, self::ST_ERROR, $wrongNumberErrorAction),
|
||||
|
||||
// IN_DECIMAL_POINT not allowed
|
||||
array(self::ST_MANTISSA, self::IN_DECIMAL_POINT, self::ST_ERROR, $wrongNumberErrorAction),
|
||||
|
||||
array(self::ST_MANTISSA, self::IN_ASCII_DIGIT, self::ST_MANTISSA),
|
||||
|
||||
// IN_CHAR not allowed
|
||||
array(self::ST_MANTISSA, self::IN_CHAR, self::ST_ERROR, $wrongNumberErrorAction),
|
||||
));
|
||||
|
||||
|
||||
/** Actions */
|
||||
$syntaxLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addQuerySyntaxLexeme');
|
||||
$lexemeModifierAction = new Zend_Search_Lucene_FSMAction($this, 'addLexemeModifier');
|
||||
$addLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addLexeme');
|
||||
$addQuotedLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addQuotedLexeme');
|
||||
$addNumberLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addNumberLexeme');
|
||||
$addLexemeCharAction = new Zend_Search_Lucene_FSMAction($this, 'addLexemeChar');
|
||||
|
||||
|
||||
/** Syntax lexeme */
|
||||
$this->addEntryAction(self::ST_SYNT_LEXEME, $syntaxLexemeAction);
|
||||
// Two lexemes in succession
|
||||
$this->addTransitionAction(self::ST_SYNT_LEXEME, self::ST_SYNT_LEXEME, $syntaxLexemeAction);
|
||||
|
||||
|
||||
/** Lexeme */
|
||||
$this->addEntryAction(self::ST_LEXEME, $addLexemeCharAction);
|
||||
$this->addTransitionAction(self::ST_LEXEME, self::ST_LEXEME, $addLexemeCharAction);
|
||||
// ST_ESCAPED_CHAR => ST_LEXEME transition is covered by ST_LEXEME entry action
|
||||
|
||||
$this->addTransitionAction(self::ST_LEXEME, self::ST_WHITE_SPACE, $addLexemeAction);
|
||||
$this->addTransitionAction(self::ST_LEXEME, self::ST_SYNT_LEXEME, $addLexemeAction);
|
||||
$this->addTransitionAction(self::ST_LEXEME, self::ST_QUOTED_LEXEME, $addLexemeAction);
|
||||
$this->addTransitionAction(self::ST_LEXEME, self::ST_LEXEME_MODIFIER, $addLexemeAction);
|
||||
$this->addTransitionAction(self::ST_LEXEME, self::ST_NUMBER, $addLexemeAction);
|
||||
$this->addTransitionAction(self::ST_LEXEME, self::ST_MANTISSA, $addLexemeAction);
|
||||
|
||||
|
||||
/** Quoted lexeme */
|
||||
// We don't need entry action (skeep quote)
|
||||
$this->addTransitionAction(self::ST_QUOTED_LEXEME, self::ST_QUOTED_LEXEME, $addLexemeCharAction);
|
||||
$this->addTransitionAction(self::ST_ESCAPED_QCHAR, self::ST_QUOTED_LEXEME, $addLexemeCharAction);
|
||||
// Closing quote changes state to the ST_WHITE_SPACE other states are not used
|
||||
$this->addTransitionAction(self::ST_QUOTED_LEXEME, self::ST_WHITE_SPACE, $addQuotedLexemeAction);
|
||||
|
||||
|
||||
/** Lexeme modifier */
|
||||
$this->addEntryAction(self::ST_LEXEME_MODIFIER, $lexemeModifierAction);
|
||||
|
||||
|
||||
/** Number */
|
||||
$this->addEntryAction(self::ST_NUMBER, $addLexemeCharAction);
|
||||
$this->addEntryAction(self::ST_MANTISSA, $addLexemeCharAction);
|
||||
$this->addTransitionAction(self::ST_NUMBER, self::ST_NUMBER, $addLexemeCharAction);
|
||||
// ST_NUMBER => ST_MANTISSA transition is covered by ST_MANTISSA entry action
|
||||
$this->addTransitionAction(self::ST_MANTISSA, self::ST_MANTISSA, $addLexemeCharAction);
|
||||
|
||||
$this->addTransitionAction(self::ST_NUMBER, self::ST_WHITE_SPACE, $addNumberLexemeAction);
|
||||
$this->addTransitionAction(self::ST_NUMBER, self::ST_SYNT_LEXEME, $addNumberLexemeAction);
|
||||
$this->addTransitionAction(self::ST_NUMBER, self::ST_LEXEME_MODIFIER, $addNumberLexemeAction);
|
||||
$this->addTransitionAction(self::ST_MANTISSA, self::ST_WHITE_SPACE, $addNumberLexemeAction);
|
||||
$this->addTransitionAction(self::ST_MANTISSA, self::ST_SYNT_LEXEME, $addNumberLexemeAction);
|
||||
$this->addTransitionAction(self::ST_MANTISSA, self::ST_LEXEME_MODIFIER, $addNumberLexemeAction);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Translate input char to an input symbol of state machine
|
||||
*
|
||||
* @param string $char
|
||||
* @return integer
|
||||
*/
|
||||
private function _translateInput($char)
|
||||
{
|
||||
if (strpos(self::QUERY_WHITE_SPACE_CHARS, $char) !== false) { return self::IN_WHITE_SPACE;
|
||||
} else if (strpos(self::QUERY_SYNT_CHARS, $char) !== false) { return self::IN_SYNT_CHAR;
|
||||
} else if (strpos(self::QUERY_MUTABLE_CHARS, $char) !== false) { return self::IN_MUTABLE_CHAR;
|
||||
} else if (strpos(self::QUERY_LEXEMEMODIFIER_CHARS, $char) !== false) { return self::IN_LEXEME_MODIFIER;
|
||||
} else if (strpos(self::QUERY_ASCIIDIGITS_CHARS, $char) !== false) { return self::IN_ASCII_DIGIT;
|
||||
} else if ($char === '"' ) { return self::IN_QUOTE;
|
||||
} else if ($char === '.' ) { return self::IN_DECIMAL_POINT;
|
||||
} else if ($char === '\\') { return self::IN_ESCAPE_CHAR;
|
||||
} else { return self::IN_CHAR;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This method is used to tokenize query string into lexemes
|
||||
*
|
||||
* @param string $inputString
|
||||
* @param string $encoding
|
||||
* @return array
|
||||
* @throws Zend_Search_Lucene_Search_QueryParserException
|
||||
*/
|
||||
public function tokenize($inputString, $encoding)
|
||||
{
|
||||
$this->reset();
|
||||
|
||||
$this->_lexemes = array();
|
||||
$this->_queryString = array();
|
||||
|
||||
if (PHP_OS == 'AIX' && $encoding == '') {
|
||||
$encoding = 'ISO8859-1';
|
||||
}
|
||||
$strLength = iconv_strlen($inputString, $encoding);
|
||||
|
||||
// Workaround for iconv_substr bug
|
||||
$inputString .= ' ';
|
||||
|
||||
for ($count = 0; $count < $strLength; $count++) {
|
||||
$this->_queryString[$count] = iconv_substr($inputString, $count, 1, $encoding);
|
||||
}
|
||||
|
||||
for ($this->_queryStringPosition = 0;
|
||||
$this->_queryStringPosition < count($this->_queryString);
|
||||
$this->_queryStringPosition++) {
|
||||
$this->process($this->_translateInput($this->_queryString[$this->_queryStringPosition]));
|
||||
}
|
||||
|
||||
$this->process(self::IN_WHITE_SPACE);
|
||||
|
||||
if ($this->getState() != self::ST_WHITE_SPACE) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Unexpected end of query');
|
||||
}
|
||||
|
||||
$this->_queryString = null;
|
||||
|
||||
return $this->_lexemes;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*********************************************************************
|
||||
* Actions implementation
|
||||
*
|
||||
* Actions affect on recognized lexemes list
|
||||
*********************************************************************/
|
||||
|
||||
/**
|
||||
* Add query syntax lexeme
|
||||
*
|
||||
* @throws Zend_Search_Lucene_Search_QueryParserException
|
||||
*/
|
||||
public function addQuerySyntaxLexeme()
|
||||
{
|
||||
$lexeme = $this->_queryString[$this->_queryStringPosition];
|
||||
|
||||
// Process two char lexemes
|
||||
if (strpos(self::QUERY_DOUBLECHARLEXEME_CHARS, $lexeme) !== false) {
|
||||
// increase current position in a query string
|
||||
$this->_queryStringPosition++;
|
||||
|
||||
// check,
|
||||
if ($this->_queryStringPosition == count($this->_queryString) ||
|
||||
$this->_queryString[$this->_queryStringPosition] != $lexeme) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Two chars lexeme expected. ' . $this->_positionMsg());
|
||||
}
|
||||
|
||||
// duplicate character
|
||||
$lexeme .= $lexeme;
|
||||
}
|
||||
|
||||
$token = new Zend_Search_Lucene_Search_QueryToken(
|
||||
Zend_Search_Lucene_Search_QueryToken::TC_SYNTAX_ELEMENT,
|
||||
$lexeme,
|
||||
$this->_queryStringPosition);
|
||||
|
||||
// Skip this lexeme if it's a field indicator ':' and treat previous as 'field' instead of 'word'
|
||||
if ($token->type == Zend_Search_Lucene_Search_QueryToken::TT_FIELD_INDICATOR) {
|
||||
$token = array_pop($this->_lexemes);
|
||||
if ($token === null || $token->type != Zend_Search_Lucene_Search_QueryToken::TT_WORD) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Field mark \':\' must follow field name. ' . $this->_positionMsg());
|
||||
}
|
||||
|
||||
$token->type = Zend_Search_Lucene_Search_QueryToken::TT_FIELD;
|
||||
}
|
||||
|
||||
$this->_lexemes[] = $token;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add lexeme modifier
|
||||
*/
|
||||
public function addLexemeModifier()
|
||||
{
|
||||
$this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
|
||||
Zend_Search_Lucene_Search_QueryToken::TC_SYNTAX_ELEMENT,
|
||||
$this->_queryString[$this->_queryStringPosition],
|
||||
$this->_queryStringPosition);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add lexeme
|
||||
*/
|
||||
public function addLexeme()
|
||||
{
|
||||
$this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
|
||||
Zend_Search_Lucene_Search_QueryToken::TC_WORD,
|
||||
$this->_currentLexeme,
|
||||
$this->_queryStringPosition - 1);
|
||||
|
||||
$this->_currentLexeme = '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Add quoted lexeme
|
||||
*/
|
||||
public function addQuotedLexeme()
|
||||
{
|
||||
$this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
|
||||
Zend_Search_Lucene_Search_QueryToken::TC_PHRASE,
|
||||
$this->_currentLexeme,
|
||||
$this->_queryStringPosition);
|
||||
|
||||
$this->_currentLexeme = '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Add number lexeme
|
||||
*/
|
||||
public function addNumberLexeme()
|
||||
{
|
||||
$this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
|
||||
Zend_Search_Lucene_Search_QueryToken::TC_NUMBER,
|
||||
$this->_currentLexeme,
|
||||
$this->_queryStringPosition - 1);
|
||||
$this->_currentLexeme = '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Extend lexeme by one char
|
||||
*/
|
||||
public function addLexemeChar()
|
||||
{
|
||||
$this->_currentLexeme .= $this->_queryString[$this->_queryStringPosition];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Position message
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private function _positionMsg()
|
||||
{
|
||||
return 'Position is ' . $this->_queryStringPosition . '.';
|
||||
}
|
||||
|
||||
|
||||
/*********************************************************************
|
||||
* Syntax errors actions
|
||||
*********************************************************************/
|
||||
public function lexModifierErrException()
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Lexeme modifier character can be followed only by number, white space or query syntax element. ' . $this->_positionMsg());
|
||||
}
|
||||
public function quoteWithinLexemeErrException()
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Quote within lexeme must be escaped by \'\\\' char. ' . $this->_positionMsg());
|
||||
}
|
||||
public function wrongNumberErrException()
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Wrong number syntax.' . $this->_positionMsg());
|
||||
}
|
||||
}
|
||||
|
635
thirdparty/Zend/Search/Lucene/Search/QueryParser.php
vendored
Normal file
635
thirdparty/Zend/Search/Lucene/Search/QueryParser.php
vendored
Normal file
@ -0,0 +1,635 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: QueryParser.php 21638 2010-03-24 17:56:46Z alexander $
|
||||
*/
|
||||
|
||||
|
||||
/** Internally used classes */
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryToken */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryToken.php';
|
||||
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_FSM */
|
||||
require_once 'Zend/Search/Lucene/FSM.php';
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
|
||||
{
|
||||
/**
|
||||
* Parser instance
|
||||
*
|
||||
* @var Zend_Search_Lucene_Search_QueryParser
|
||||
*/
|
||||
private static $_instance = null;
|
||||
|
||||
|
||||
/**
|
||||
* Query lexer
|
||||
*
|
||||
* @var Zend_Search_Lucene_Search_QueryLexer
|
||||
*/
|
||||
private $_lexer;
|
||||
|
||||
/**
|
||||
* Tokens list
|
||||
* Array of Zend_Search_Lucene_Search_QueryToken objects
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_tokens;
|
||||
|
||||
/**
|
||||
* Current token
|
||||
*
|
||||
* @var integer|string
|
||||
*/
|
||||
private $_currentToken;
|
||||
|
||||
/**
|
||||
* Last token
|
||||
*
|
||||
* It can be processed within FSM states, but this addirional state simplifies FSM
|
||||
*
|
||||
* @var Zend_Search_Lucene_Search_QueryToken
|
||||
*/
|
||||
private $_lastToken = null;
|
||||
|
||||
/**
|
||||
* Range query first term
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_rqFirstTerm = null;
|
||||
|
||||
/**
|
||||
* Current query parser context
|
||||
*
|
||||
* @var Zend_Search_Lucene_Search_QueryParserContext
|
||||
*/
|
||||
private $_context;
|
||||
|
||||
/**
|
||||
* Context stack
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_contextStack;
|
||||
|
||||
/**
|
||||
* Query string encoding
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_encoding;
|
||||
|
||||
/**
|
||||
* Query string default encoding
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_defaultEncoding = '';
|
||||
|
||||
/**
|
||||
* Defines query parsing mode.
|
||||
*
|
||||
* If this option is turned on, then query parser suppress query parser exceptions
|
||||
* and constructs multi-term query using all words from a query.
|
||||
*
|
||||
* That helps to avoid exceptions caused by queries, which don't conform to query language,
|
||||
* but limits possibilities to check, that query entered by user has some inconsistencies.
|
||||
*
|
||||
*
|
||||
* Default is true.
|
||||
*
|
||||
* Use {@link Zend_Search_Lucene::suppressQueryParsingExceptions()},
|
||||
* {@link Zend_Search_Lucene::dontSuppressQueryParsingExceptions()} and
|
||||
* {@link Zend_Search_Lucene::checkQueryParsingExceptionsSuppressMode()} to operate
|
||||
* with this setting.
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
private $_suppressQueryParsingExceptions = true;
|
||||
|
||||
/**
|
||||
* Boolean operators constants
|
||||
*/
|
||||
const B_OR = 0;
|
||||
const B_AND = 1;
|
||||
|
||||
/**
|
||||
* Default boolean queries operator
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_defaultOperator = self::B_OR;
|
||||
|
||||
|
||||
/** Query parser State Machine states */
|
||||
const ST_COMMON_QUERY_ELEMENT = 0; // Terms, phrases, operators
|
||||
const ST_CLOSEDINT_RQ_START = 1; // Range query start (closed interval) - '['
|
||||
const ST_CLOSEDINT_RQ_FIRST_TERM = 2; // First term in '[term1 to term2]' construction
|
||||
const ST_CLOSEDINT_RQ_TO_TERM = 3; // 'TO' lexeme in '[term1 to term2]' construction
|
||||
const ST_CLOSEDINT_RQ_LAST_TERM = 4; // Second term in '[term1 to term2]' construction
|
||||
const ST_CLOSEDINT_RQ_END = 5; // Range query end (closed interval) - ']'
|
||||
const ST_OPENEDINT_RQ_START = 6; // Range query start (opened interval) - '{'
|
||||
const ST_OPENEDINT_RQ_FIRST_TERM = 7; // First term in '{term1 to term2}' construction
|
||||
const ST_OPENEDINT_RQ_TO_TERM = 8; // 'TO' lexeme in '{term1 to term2}' construction
|
||||
const ST_OPENEDINT_RQ_LAST_TERM = 9; // Second term in '{term1 to term2}' construction
|
||||
const ST_OPENEDINT_RQ_END = 10; // Range query end (opened interval) - '}'
|
||||
|
||||
/**
|
||||
* Parser constructor
|
||||
*/
|
||||
public function __construct()
|
||||
{
|
||||
parent::__construct(array(self::ST_COMMON_QUERY_ELEMENT,
|
||||
self::ST_CLOSEDINT_RQ_START,
|
||||
self::ST_CLOSEDINT_RQ_FIRST_TERM,
|
||||
self::ST_CLOSEDINT_RQ_TO_TERM,
|
||||
self::ST_CLOSEDINT_RQ_LAST_TERM,
|
||||
self::ST_CLOSEDINT_RQ_END,
|
||||
self::ST_OPENEDINT_RQ_START,
|
||||
self::ST_OPENEDINT_RQ_FIRST_TERM,
|
||||
self::ST_OPENEDINT_RQ_TO_TERM,
|
||||
self::ST_OPENEDINT_RQ_LAST_TERM,
|
||||
self::ST_OPENEDINT_RQ_END
|
||||
),
|
||||
Zend_Search_Lucene_Search_QueryToken::getTypes());
|
||||
|
||||
$this->addRules(
|
||||
array(array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_WORD, self::ST_COMMON_QUERY_ELEMENT),
|
||||
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_PHRASE, self::ST_COMMON_QUERY_ELEMENT),
|
||||
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_FIELD, self::ST_COMMON_QUERY_ELEMENT),
|
||||
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_REQUIRED, self::ST_COMMON_QUERY_ELEMENT),
|
||||
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_PROHIBITED, self::ST_COMMON_QUERY_ELEMENT),
|
||||
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_FUZZY_PROX_MARK, self::ST_COMMON_QUERY_ELEMENT),
|
||||
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_BOOSTING_MARK, self::ST_COMMON_QUERY_ELEMENT),
|
||||
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_RANGE_INCL_START, self::ST_CLOSEDINT_RQ_START),
|
||||
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_RANGE_EXCL_START, self::ST_OPENEDINT_RQ_START),
|
||||
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_SUBQUERY_START, self::ST_COMMON_QUERY_ELEMENT),
|
||||
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_SUBQUERY_END, self::ST_COMMON_QUERY_ELEMENT),
|
||||
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_AND_LEXEME, self::ST_COMMON_QUERY_ELEMENT),
|
||||
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_OR_LEXEME, self::ST_COMMON_QUERY_ELEMENT),
|
||||
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_NOT_LEXEME, self::ST_COMMON_QUERY_ELEMENT),
|
||||
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_NUMBER, self::ST_COMMON_QUERY_ELEMENT)
|
||||
));
|
||||
$this->addRules(
|
||||
array(array(self::ST_CLOSEDINT_RQ_START, Zend_Search_Lucene_Search_QueryToken::TT_WORD, self::ST_CLOSEDINT_RQ_FIRST_TERM),
|
||||
array(self::ST_CLOSEDINT_RQ_FIRST_TERM, Zend_Search_Lucene_Search_QueryToken::TT_TO_LEXEME, self::ST_CLOSEDINT_RQ_TO_TERM),
|
||||
array(self::ST_CLOSEDINT_RQ_TO_TERM, Zend_Search_Lucene_Search_QueryToken::TT_WORD, self::ST_CLOSEDINT_RQ_LAST_TERM),
|
||||
array(self::ST_CLOSEDINT_RQ_LAST_TERM, Zend_Search_Lucene_Search_QueryToken::TT_RANGE_INCL_END, self::ST_COMMON_QUERY_ELEMENT)
|
||||
));
|
||||
$this->addRules(
|
||||
array(array(self::ST_OPENEDINT_RQ_START, Zend_Search_Lucene_Search_QueryToken::TT_WORD, self::ST_OPENEDINT_RQ_FIRST_TERM),
|
||||
array(self::ST_OPENEDINT_RQ_FIRST_TERM, Zend_Search_Lucene_Search_QueryToken::TT_TO_LEXEME, self::ST_OPENEDINT_RQ_TO_TERM),
|
||||
array(self::ST_OPENEDINT_RQ_TO_TERM, Zend_Search_Lucene_Search_QueryToken::TT_WORD, self::ST_OPENEDINT_RQ_LAST_TERM),
|
||||
array(self::ST_OPENEDINT_RQ_LAST_TERM, Zend_Search_Lucene_Search_QueryToken::TT_RANGE_EXCL_END, self::ST_COMMON_QUERY_ELEMENT)
|
||||
));
|
||||
|
||||
|
||||
|
||||
$addTermEntryAction = new Zend_Search_Lucene_FSMAction($this, 'addTermEntry');
|
||||
$addPhraseEntryAction = new Zend_Search_Lucene_FSMAction($this, 'addPhraseEntry');
|
||||
$setFieldAction = new Zend_Search_Lucene_FSMAction($this, 'setField');
|
||||
$setSignAction = new Zend_Search_Lucene_FSMAction($this, 'setSign');
|
||||
$setFuzzyProxAction = new Zend_Search_Lucene_FSMAction($this, 'processFuzzyProximityModifier');
|
||||
$processModifierParameterAction = new Zend_Search_Lucene_FSMAction($this, 'processModifierParameter');
|
||||
$subqueryStartAction = new Zend_Search_Lucene_FSMAction($this, 'subqueryStart');
|
||||
$subqueryEndAction = new Zend_Search_Lucene_FSMAction($this, 'subqueryEnd');
|
||||
$logicalOperatorAction = new Zend_Search_Lucene_FSMAction($this, 'logicalOperator');
|
||||
$openedRQFirstTermAction = new Zend_Search_Lucene_FSMAction($this, 'openedRQFirstTerm');
|
||||
$openedRQLastTermAction = new Zend_Search_Lucene_FSMAction($this, 'openedRQLastTerm');
|
||||
$closedRQFirstTermAction = new Zend_Search_Lucene_FSMAction($this, 'closedRQFirstTerm');
|
||||
$closedRQLastTermAction = new Zend_Search_Lucene_FSMAction($this, 'closedRQLastTerm');
|
||||
|
||||
|
||||
$this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_WORD, $addTermEntryAction);
|
||||
$this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_PHRASE, $addPhraseEntryAction);
|
||||
$this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_FIELD, $setFieldAction);
|
||||
$this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_REQUIRED, $setSignAction);
|
||||
$this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_PROHIBITED, $setSignAction);
|
||||
$this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_FUZZY_PROX_MARK, $setFuzzyProxAction);
|
||||
$this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_NUMBER, $processModifierParameterAction);
|
||||
$this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_SUBQUERY_START, $subqueryStartAction);
|
||||
$this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_SUBQUERY_END, $subqueryEndAction);
|
||||
$this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_AND_LEXEME, $logicalOperatorAction);
|
||||
$this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_OR_LEXEME, $logicalOperatorAction);
|
||||
$this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_NOT_LEXEME, $logicalOperatorAction);
|
||||
|
||||
$this->addEntryAction(self::ST_OPENEDINT_RQ_FIRST_TERM, $openedRQFirstTermAction);
|
||||
$this->addEntryAction(self::ST_OPENEDINT_RQ_LAST_TERM, $openedRQLastTermAction);
|
||||
$this->addEntryAction(self::ST_CLOSEDINT_RQ_FIRST_TERM, $closedRQFirstTermAction);
|
||||
$this->addEntryAction(self::ST_CLOSEDINT_RQ_LAST_TERM, $closedRQLastTermAction);
|
||||
|
||||
|
||||
require_once 'Zend/Search/Lucene/Search/QueryLexer.php';
|
||||
$this->_lexer = new Zend_Search_Lucene_Search_QueryLexer();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get query parser instance
|
||||
*
|
||||
* @return Zend_Search_Lucene_Search_QueryParser
|
||||
*/
|
||||
private static function _getInstance()
|
||||
{
|
||||
if (self::$_instance === null) {
|
||||
self::$_instance = new self();
|
||||
}
|
||||
return self::$_instance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set query string default encoding
|
||||
*
|
||||
* @param string $encoding
|
||||
*/
|
||||
public static function setDefaultEncoding($encoding)
|
||||
{
|
||||
self::_getInstance()->_defaultEncoding = $encoding;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get query string default encoding
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function getDefaultEncoding()
|
||||
{
|
||||
return self::_getInstance()->_defaultEncoding;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set default boolean operator
|
||||
*
|
||||
* @param integer $operator
|
||||
*/
|
||||
public static function setDefaultOperator($operator)
|
||||
{
|
||||
self::_getInstance()->_defaultOperator = $operator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get default boolean operator
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public static function getDefaultOperator()
|
||||
{
|
||||
return self::_getInstance()->_defaultOperator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Turn on 'suppress query parser exceptions' mode.
|
||||
*/
|
||||
public static function suppressQueryParsingExceptions()
|
||||
{
|
||||
self::_getInstance()->_suppressQueryParsingExceptions = true;
|
||||
}
|
||||
/**
|
||||
* Turn off 'suppress query parser exceptions' mode.
|
||||
*/
|
||||
public static function dontSuppressQueryParsingExceptions()
|
||||
{
|
||||
self::_getInstance()->_suppressQueryParsingExceptions = false;
|
||||
}
|
||||
/**
|
||||
* Check 'suppress query parser exceptions' mode.
|
||||
* @return boolean
|
||||
*/
|
||||
public static function queryParsingExceptionsSuppressed()
|
||||
{
|
||||
return self::_getInstance()->_suppressQueryParsingExceptions;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Escape keyword to force it to be parsed as one term
|
||||
*
|
||||
* @param string $keyword
|
||||
* @return string
|
||||
*/
|
||||
public static function escape($keyword)
|
||||
{
|
||||
return '\\' . implode('\\', str_split($keyword));
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a query string
|
||||
*
|
||||
* @param string $strQuery
|
||||
* @param string $encoding
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
* @throws Zend_Search_Lucene_Search_QueryParserException
|
||||
*/
|
||||
public static function parse($strQuery, $encoding = null)
|
||||
{
|
||||
self::_getInstance();
|
||||
|
||||
// Reset FSM if previous parse operation didn't return it into a correct state
|
||||
self::$_instance->reset();
|
||||
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
try {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserContext.php';
|
||||
|
||||
self::$_instance->_encoding = ($encoding !== null) ? $encoding : self::$_instance->_defaultEncoding;
|
||||
self::$_instance->_lastToken = null;
|
||||
self::$_instance->_context = new Zend_Search_Lucene_Search_QueryParserContext(self::$_instance->_encoding);
|
||||
self::$_instance->_contextStack = array();
|
||||
self::$_instance->_tokens = self::$_instance->_lexer->tokenize($strQuery, self::$_instance->_encoding);
|
||||
|
||||
// Empty query
|
||||
if (count(self::$_instance->_tokens) == 0) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Insignificant();
|
||||
}
|
||||
|
||||
|
||||
foreach (self::$_instance->_tokens as $token) {
|
||||
try {
|
||||
self::$_instance->_currentToken = $token;
|
||||
self::$_instance->process($token->type);
|
||||
|
||||
self::$_instance->_lastToken = $token;
|
||||
} catch (Exception $e) {
|
||||
if (strpos($e->getMessage(), 'There is no any rule for') !== false) {
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException( 'Syntax error at char position ' . $token->position . '.', 0, $e);
|
||||
}
|
||||
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
|
||||
}
|
||||
}
|
||||
|
||||
if (count(self::$_instance->_contextStack) != 0) {
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Syntax Error: mismatched parentheses, every opening must have closing.' );
|
||||
}
|
||||
|
||||
return self::$_instance->_context->getQuery();
|
||||
} catch (Zend_Search_Lucene_Search_QueryParserException $e) {
|
||||
if (self::$_instance->_suppressQueryParsingExceptions) {
|
||||
$queryTokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($strQuery, self::$_instance->_encoding);
|
||||
|
||||
require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
|
||||
$query = new Zend_Search_Lucene_Search_Query_MultiTerm();
|
||||
$termsSign = (self::$_instance->_defaultOperator == self::B_AND) ? true /* required term */ :
|
||||
null /* optional term */;
|
||||
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
foreach ($queryTokens as $token) {
|
||||
$query->addTerm(new Zend_Search_Lucene_Index_Term($token->getTermText()), $termsSign);
|
||||
}
|
||||
|
||||
|
||||
return $query;
|
||||
} else {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*********************************************************************
|
||||
* Actions implementation
|
||||
*
|
||||
* Actions affect on recognized lexemes list
|
||||
*********************************************************************/
|
||||
|
||||
/**
|
||||
* Add term to a query
|
||||
*/
|
||||
public function addTermEntry()
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Search/QueryEntry/Term.php';
|
||||
$entry = new Zend_Search_Lucene_Search_QueryEntry_Term($this->_currentToken->text, $this->_context->getField());
|
||||
$this->_context->addEntry($entry);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add phrase to a query
|
||||
*/
|
||||
public function addPhraseEntry()
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Search/QueryEntry/Phrase.php';
|
||||
$entry = new Zend_Search_Lucene_Search_QueryEntry_Phrase($this->_currentToken->text, $this->_context->getField());
|
||||
$this->_context->addEntry($entry);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set entry field
|
||||
*/
|
||||
public function setField()
|
||||
{
|
||||
$this->_context->setNextEntryField($this->_currentToken->text);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set entry sign
|
||||
*/
|
||||
public function setSign()
|
||||
{
|
||||
$this->_context->setNextEntrySign($this->_currentToken->type);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Process fuzzy search/proximity modifier - '~'
|
||||
*/
|
||||
public function processFuzzyProximityModifier()
|
||||
{
|
||||
$this->_context->processFuzzyProximityModifier();
|
||||
}
|
||||
|
||||
/**
|
||||
* Process modifier parameter
|
||||
*
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function processModifierParameter()
|
||||
{
|
||||
if ($this->_lastToken === null) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Lexeme modifier parameter must follow lexeme modifier. Char position 0.' );
|
||||
}
|
||||
|
||||
switch ($this->_lastToken->type) {
|
||||
case Zend_Search_Lucene_Search_QueryToken::TT_FUZZY_PROX_MARK:
|
||||
$this->_context->processFuzzyProximityModifier($this->_currentToken->text);
|
||||
break;
|
||||
|
||||
case Zend_Search_Lucene_Search_QueryToken::TT_BOOSTING_MARK:
|
||||
$this->_context->boost($this->_currentToken->text);
|
||||
break;
|
||||
|
||||
default:
|
||||
// It's not a user input exception
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Lexeme modifier parameter must follow lexeme modifier. Char position 0.' );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Start subquery
|
||||
*/
|
||||
public function subqueryStart()
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserContext.php';
|
||||
|
||||
$this->_contextStack[] = $this->_context;
|
||||
$this->_context = new Zend_Search_Lucene_Search_QueryParserContext($this->_encoding, $this->_context->getField());
|
||||
}
|
||||
|
||||
/**
|
||||
* End subquery
|
||||
*/
|
||||
public function subqueryEnd()
|
||||
{
|
||||
if (count($this->_contextStack) == 0) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Syntax Error: mismatched parentheses, every opening must have closing. Char position ' . $this->_currentToken->position . '.' );
|
||||
}
|
||||
|
||||
$query = $this->_context->getQuery();
|
||||
$this->_context = array_pop($this->_contextStack);
|
||||
|
||||
require_once 'Zend/Search/Lucene/Search/QueryEntry/Subquery.php';
|
||||
$this->_context->addEntry(new Zend_Search_Lucene_Search_QueryEntry_Subquery($query));
|
||||
}
|
||||
|
||||
/**
|
||||
* Process logical operator
|
||||
*/
|
||||
public function logicalOperator()
|
||||
{
|
||||
$this->_context->addLogicalOperator($this->_currentToken->type);
|
||||
}
|
||||
|
||||
/**
|
||||
* Process first range query term (opened interval)
|
||||
*/
|
||||
public function openedRQFirstTerm()
|
||||
{
|
||||
$this->_rqFirstTerm = $this->_currentToken->text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process last range query term (opened interval)
|
||||
*
|
||||
* @throws Zend_Search_Lucene_Search_QueryParserException
|
||||
*/
|
||||
public function openedRQLastTerm()
|
||||
{
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_rqFirstTerm, $this->_encoding);
|
||||
if (count($tokens) > 1) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Range query boundary terms must be non-multiple word terms');
|
||||
} else if (count($tokens) == 1) {
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
$from = new Zend_Search_Lucene_Index_Term(reset($tokens)->getTermText(), $this->_context->getField());
|
||||
} else {
|
||||
$from = null;
|
||||
}
|
||||
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_currentToken->text, $this->_encoding);
|
||||
if (count($tokens) > 1) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Range query boundary terms must be non-multiple word terms');
|
||||
} else if (count($tokens) == 1) {
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
$to = new Zend_Search_Lucene_Index_Term(reset($tokens)->getTermText(), $this->_context->getField());
|
||||
} else {
|
||||
$to = null;
|
||||
}
|
||||
|
||||
if ($from === null && $to === null) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('At least one range query boundary term must be non-empty term');
|
||||
}
|
||||
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Range.php';
|
||||
$rangeQuery = new Zend_Search_Lucene_Search_Query_Range($from, $to, false);
|
||||
require_once 'Zend/Search/Lucene/Search/QueryEntry/Subquery.php';
|
||||
$entry = new Zend_Search_Lucene_Search_QueryEntry_Subquery($rangeQuery);
|
||||
$this->_context->addEntry($entry);
|
||||
}
|
||||
|
||||
/**
|
||||
* Process first range query term (closed interval)
|
||||
*/
|
||||
public function closedRQFirstTerm()
|
||||
{
|
||||
$this->_rqFirstTerm = $this->_currentToken->text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process last range query term (closed interval)
|
||||
*
|
||||
* @throws Zend_Search_Lucene_Search_QueryParserException
|
||||
*/
|
||||
public function closedRQLastTerm()
|
||||
{
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_rqFirstTerm, $this->_encoding);
|
||||
if (count($tokens) > 1) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Range query boundary terms must be non-multiple word terms');
|
||||
} else if (count($tokens) == 1) {
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
$from = new Zend_Search_Lucene_Index_Term(reset($tokens)->getTermText(), $this->_context->getField());
|
||||
} else {
|
||||
$from = null;
|
||||
}
|
||||
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_currentToken->text, $this->_encoding);
|
||||
if (count($tokens) > 1) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Range query boundary terms must be non-multiple word terms');
|
||||
} else if (count($tokens) == 1) {
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
$to = new Zend_Search_Lucene_Index_Term(reset($tokens)->getTermText(), $this->_context->getField());
|
||||
} else {
|
||||
$to = null;
|
||||
}
|
||||
|
||||
if ($from === null && $to === null) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('At least one range query boundary term must be non-empty term');
|
||||
}
|
||||
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Range.php';
|
||||
$rangeQuery = new Zend_Search_Lucene_Search_Query_Range($from, $to, true);
|
||||
require_once 'Zend/Search/Lucene/Search/QueryEntry/Subquery.php';
|
||||
$entry = new Zend_Search_Lucene_Search_QueryEntry_Subquery($rangeQuery);
|
||||
$this->_context->addEntry($entry);
|
||||
}
|
||||
}
|
||||
|
401
thirdparty/Zend/Search/Lucene/Search/QueryParserContext.php
vendored
Normal file
401
thirdparty/Zend/Search/Lucene/Search/QueryParserContext.php
vendored
Normal file
@ -0,0 +1,401 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: QueryParserContext.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryToken */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryToken.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_QueryParserContext
|
||||
{
|
||||
/**
|
||||
* Default field for the context.
|
||||
*
|
||||
* null means, that term should be searched through all fields
|
||||
* Zend_Search_Lucene_Search_Query::rewriteQuery($index) transletes such queries to several
|
||||
*
|
||||
* @var string|null
|
||||
*/
|
||||
private $_defaultField;
|
||||
|
||||
/**
|
||||
* Field specified for next entry
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_nextEntryField = null;
|
||||
|
||||
/**
|
||||
* True means, that term is required.
|
||||
* False means, that term is prohibited.
|
||||
* null means, that term is neither prohibited, nor required
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
private $_nextEntrySign = null;
|
||||
|
||||
|
||||
/**
|
||||
* Entries grouping mode
|
||||
*/
|
||||
const GM_SIGNS = 0; // Signs mode: '+term1 term2 -term3 +(subquery1) -(subquery2)'
|
||||
const GM_BOOLEAN = 1; // Boolean operators mode: 'term1 and term2 or (subquery1) and not (subquery2)'
|
||||
|
||||
/**
|
||||
* Grouping mode
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_mode = null;
|
||||
|
||||
/**
|
||||
* Entries signs.
|
||||
* Used in GM_SIGNS grouping mode
|
||||
*
|
||||
* @var arrays
|
||||
*/
|
||||
private $_signs = array();
|
||||
|
||||
/**
|
||||
* Query entries
|
||||
* Each entry is a Zend_Search_Lucene_Search_QueryEntry object or
|
||||
* boolean operator (Zend_Search_Lucene_Search_QueryToken class constant)
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_entries = array();
|
||||
|
||||
/**
|
||||
* Query string encoding
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_encoding;
|
||||
|
||||
|
||||
/**
|
||||
* Context object constructor
|
||||
*
|
||||
* @param string $encoding
|
||||
* @param string|null $defaultField
|
||||
*/
|
||||
public function __construct($encoding, $defaultField = null)
|
||||
{
|
||||
$this->_encoding = $encoding;
|
||||
$this->_defaultField = $defaultField;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get context default field
|
||||
*
|
||||
* @return string|null
|
||||
*/
|
||||
public function getField()
|
||||
{
|
||||
return ($this->_nextEntryField !== null) ? $this->_nextEntryField : $this->_defaultField;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set field for next entry
|
||||
*
|
||||
* @param string $field
|
||||
*/
|
||||
public function setNextEntryField($field)
|
||||
{
|
||||
$this->_nextEntryField = $field;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Set sign for next entry
|
||||
*
|
||||
* @param integer $sign
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function setNextEntrySign($sign)
|
||||
{
|
||||
if ($this->_mode === self::GM_BOOLEAN) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('It\'s not allowed to mix boolean and signs styles in the same subquery.');
|
||||
}
|
||||
|
||||
$this->_mode = self::GM_SIGNS;
|
||||
|
||||
if ($sign == Zend_Search_Lucene_Search_QueryToken::TT_REQUIRED) {
|
||||
$this->_nextEntrySign = true;
|
||||
} else if ($sign == Zend_Search_Lucene_Search_QueryToken::TT_PROHIBITED) {
|
||||
$this->_nextEntrySign = false;
|
||||
} else {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Unrecognized sign type.');
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add entry to a query
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_QueryEntry $entry
|
||||
*/
|
||||
public function addEntry(Zend_Search_Lucene_Search_QueryEntry $entry)
|
||||
{
|
||||
if ($this->_mode !== self::GM_BOOLEAN) {
|
||||
$this->_signs[] = $this->_nextEntrySign;
|
||||
}
|
||||
|
||||
$this->_entries[] = $entry;
|
||||
|
||||
$this->_nextEntryField = null;
|
||||
$this->_nextEntrySign = null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Process fuzzy search or proximity search modifier
|
||||
*
|
||||
* @throws Zend_Search_Lucene_Search_QueryParserException
|
||||
*/
|
||||
public function processFuzzyProximityModifier($parameter = null)
|
||||
{
|
||||
// Check, that modifier has came just after word or phrase
|
||||
if ($this->_nextEntryField !== null || $this->_nextEntrySign !== null) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' modifier must follow word or phrase.');
|
||||
}
|
||||
|
||||
$lastEntry = array_pop($this->_entries);
|
||||
|
||||
if (!$lastEntry instanceof Zend_Search_Lucene_Search_QueryEntry) {
|
||||
// there are no entries or last entry is boolean operator
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' modifier must follow word or phrase.');
|
||||
}
|
||||
|
||||
$lastEntry->processFuzzyProximityModifier($parameter);
|
||||
|
||||
$this->_entries[] = $lastEntry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set boost factor to the entry
|
||||
*
|
||||
* @param float $boostFactor
|
||||
*/
|
||||
public function boost($boostFactor)
|
||||
{
|
||||
// Check, that modifier has came just after word or phrase
|
||||
if ($this->_nextEntryField !== null || $this->_nextEntrySign !== null) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('\'^\' modifier must follow word, phrase or subquery.');
|
||||
}
|
||||
|
||||
$lastEntry = array_pop($this->_entries);
|
||||
|
||||
if (!$lastEntry instanceof Zend_Search_Lucene_Search_QueryEntry) {
|
||||
// there are no entries or last entry is boolean operator
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('\'^\' modifier must follow word, phrase or subquery.');
|
||||
}
|
||||
|
||||
$lastEntry->boost($boostFactor);
|
||||
|
||||
$this->_entries[] = $lastEntry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process logical operator
|
||||
*
|
||||
* @param integer $operator
|
||||
*/
|
||||
public function addLogicalOperator($operator)
|
||||
{
|
||||
if ($this->_mode === self::GM_SIGNS) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('It\'s not allowed to mix boolean and signs styles in the same subquery.');
|
||||
}
|
||||
|
||||
$this->_mode = self::GM_BOOLEAN;
|
||||
|
||||
$this->_entries[] = $operator;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Generate 'signs style' query from the context
|
||||
* '+term1 term2 -term3 +(<subquery1>) ...'
|
||||
*
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function _signStyleExpressionQuery()
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
|
||||
$query = new Zend_Search_Lucene_Search_Query_Boolean();
|
||||
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParser.php';
|
||||
if (Zend_Search_Lucene_Search_QueryParser::getDefaultOperator() == Zend_Search_Lucene_Search_QueryParser::B_AND) {
|
||||
$defaultSign = true; // required
|
||||
} else {
|
||||
// Zend_Search_Lucene_Search_QueryParser::B_OR
|
||||
$defaultSign = null; // optional
|
||||
}
|
||||
|
||||
foreach ($this->_entries as $entryId => $entry) {
|
||||
$sign = ($this->_signs[$entryId] !== null) ? $this->_signs[$entryId] : $defaultSign;
|
||||
$query->addSubquery($entry->getQuery($this->_encoding), $sign);
|
||||
}
|
||||
|
||||
return $query;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Generate 'boolean style' query from the context
|
||||
* 'term1 and term2 or term3 and (<subquery1>) and not (<subquery2>)'
|
||||
*
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
* @throws Zend_Search_Lucene
|
||||
*/
|
||||
private function _booleanExpressionQuery()
|
||||
{
|
||||
/**
|
||||
* We treat each level of an expression as a boolean expression in
|
||||
* a Disjunctive Normal Form
|
||||
*
|
||||
* AND operator has higher precedence than OR
|
||||
*
|
||||
* Thus logical query is a disjunction of one or more conjunctions of
|
||||
* one or more query entries
|
||||
*/
|
||||
|
||||
require_once 'Zend/Search/Lucene/Search/BooleanExpressionRecognizer.php';
|
||||
$expressionRecognizer = new Zend_Search_Lucene_Search_BooleanExpressionRecognizer();
|
||||
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
try {
|
||||
foreach ($this->_entries as $entry) {
|
||||
if ($entry instanceof Zend_Search_Lucene_Search_QueryEntry) {
|
||||
$expressionRecognizer->processLiteral($entry);
|
||||
} else {
|
||||
switch ($entry) {
|
||||
case Zend_Search_Lucene_Search_QueryToken::TT_AND_LEXEME:
|
||||
$expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_AND_OPERATOR);
|
||||
break;
|
||||
|
||||
case Zend_Search_Lucene_Search_QueryToken::TT_OR_LEXEME:
|
||||
$expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_OR_OPERATOR);
|
||||
break;
|
||||
|
||||
case Zend_Search_Lucene_Search_QueryToken::TT_NOT_LEXEME:
|
||||
$expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_NOT_OPERATOR);
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new Zend_Search_Lucene('Boolean expression error. Unknown operator type.');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$conjuctions = $expressionRecognizer->finishExpression();
|
||||
} catch (Zend_Search_Exception $e) {
|
||||
// throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error. Error message: \'' .
|
||||
// $e->getMessage() . '\'.' );
|
||||
// It's query syntax error message and it should be user friendly. So FSM message is omitted
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error.', 0, $e);
|
||||
}
|
||||
|
||||
// Remove 'only negative' conjunctions
|
||||
foreach ($conjuctions as $conjuctionId => $conjuction) {
|
||||
$nonNegativeEntryFound = false;
|
||||
|
||||
foreach ($conjuction as $conjuctionEntry) {
|
||||
if ($conjuctionEntry[1]) {
|
||||
$nonNegativeEntryFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!$nonNegativeEntryFound) {
|
||||
unset($conjuctions[$conjuctionId]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
$subqueries = array();
|
||||
foreach ($conjuctions as $conjuction) {
|
||||
// Check, if it's a one term conjuction
|
||||
if (count($conjuction) == 1) {
|
||||
$subqueries[] = $conjuction[0][0]->getQuery($this->_encoding);
|
||||
} else {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
|
||||
$subquery = new Zend_Search_Lucene_Search_Query_Boolean();
|
||||
|
||||
foreach ($conjuction as $conjuctionEntry) {
|
||||
$subquery->addSubquery($conjuctionEntry[0]->getQuery($this->_encoding), $conjuctionEntry[1]);
|
||||
}
|
||||
|
||||
$subqueries[] = $subquery;
|
||||
}
|
||||
}
|
||||
|
||||
if (count($subqueries) == 0) {
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
|
||||
return new Zend_Search_Lucene_Search_Query_Insignificant();
|
||||
}
|
||||
|
||||
if (count($subqueries) == 1) {
|
||||
return $subqueries[0];
|
||||
}
|
||||
|
||||
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
|
||||
$query = new Zend_Search_Lucene_Search_Query_Boolean();
|
||||
|
||||
foreach ($subqueries as $subquery) {
|
||||
// Non-requirered entry/subquery
|
||||
$query->addSubquery($subquery);
|
||||
}
|
||||
|
||||
return $query;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate query from current context
|
||||
*
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function getQuery()
|
||||
{
|
||||
if ($this->_mode === self::GM_BOOLEAN) {
|
||||
return $this->_booleanExpressionQuery();
|
||||
} else {
|
||||
return $this->_signStyleExpressionQuery();
|
||||
}
|
||||
}
|
||||
}
|
41
thirdparty/Zend/Search/Lucene/Search/QueryParserException.php
vendored
Normal file
41
thirdparty/Zend/Search/Lucene/Search/QueryParserException.php
vendored
Normal file
@ -0,0 +1,41 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: QueryParserException.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Zend_Search_Lucene base exception
|
||||
*/
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*
|
||||
* Special exception type, which may be used to intercept wrong user input
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_QueryParserException extends Zend_Search_Lucene_Exception
|
||||
{}
|
||||
|
225
thirdparty/Zend/Search/Lucene/Search/QueryToken.php
vendored
Normal file
225
thirdparty/Zend/Search/Lucene/Search/QueryToken.php
vendored
Normal file
@ -0,0 +1,225 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: QueryToken.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_QueryToken
|
||||
{
|
||||
/**
|
||||
* Token types.
|
||||
*/
|
||||
const TT_WORD = 0; // Word
|
||||
const TT_PHRASE = 1; // Phrase (one or several quoted words)
|
||||
const TT_FIELD = 2; // Field name in 'field:word', field:<phrase> or field:(<subquery>) pairs
|
||||
const TT_FIELD_INDICATOR = 3; // ':'
|
||||
const TT_REQUIRED = 4; // '+'
|
||||
const TT_PROHIBITED = 5; // '-'
|
||||
const TT_FUZZY_PROX_MARK = 6; // '~'
|
||||
const TT_BOOSTING_MARK = 7; // '^'
|
||||
const TT_RANGE_INCL_START = 8; // '['
|
||||
const TT_RANGE_INCL_END = 9; // ']'
|
||||
const TT_RANGE_EXCL_START = 10; // '{'
|
||||
const TT_RANGE_EXCL_END = 11; // '}'
|
||||
const TT_SUBQUERY_START = 12; // '('
|
||||
const TT_SUBQUERY_END = 13; // ')'
|
||||
const TT_AND_LEXEME = 14; // 'AND' or 'and'
|
||||
const TT_OR_LEXEME = 15; // 'OR' or 'or'
|
||||
const TT_NOT_LEXEME = 16; // 'NOT' or 'not'
|
||||
const TT_TO_LEXEME = 17; // 'TO' or 'to'
|
||||
const TT_NUMBER = 18; // Number, like: 10, 0.8, .64, ....
|
||||
|
||||
|
||||
/**
|
||||
* Returns all possible lexeme types.
|
||||
* It's used for syntax analyzer state machine initialization
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public static function getTypes()
|
||||
{
|
||||
return array( self::TT_WORD,
|
||||
self::TT_PHRASE,
|
||||
self::TT_FIELD,
|
||||
self::TT_FIELD_INDICATOR,
|
||||
self::TT_REQUIRED,
|
||||
self::TT_PROHIBITED,
|
||||
self::TT_FUZZY_PROX_MARK,
|
||||
self::TT_BOOSTING_MARK,
|
||||
self::TT_RANGE_INCL_START,
|
||||
self::TT_RANGE_INCL_END,
|
||||
self::TT_RANGE_EXCL_START,
|
||||
self::TT_RANGE_EXCL_END,
|
||||
self::TT_SUBQUERY_START,
|
||||
self::TT_SUBQUERY_END,
|
||||
self::TT_AND_LEXEME,
|
||||
self::TT_OR_LEXEME,
|
||||
self::TT_NOT_LEXEME,
|
||||
self::TT_TO_LEXEME,
|
||||
self::TT_NUMBER
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* TokenCategories
|
||||
*/
|
||||
const TC_WORD = 0; // Word
|
||||
const TC_PHRASE = 1; // Phrase (one or several quoted words)
|
||||
const TC_NUMBER = 2; // Nubers, which are used with syntax elements. Ex. roam~0.8
|
||||
const TC_SYNTAX_ELEMENT = 3; // + - ( ) [ ] { } ! || && ~ ^
|
||||
|
||||
|
||||
/**
|
||||
* Token type.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
public $type;
|
||||
|
||||
/**
|
||||
* Token text.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
public $text;
|
||||
|
||||
/**
|
||||
* Token position within query.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
public $position;
|
||||
|
||||
|
||||
/**
|
||||
* IndexReader constructor needs token type and token text as a parameters.
|
||||
*
|
||||
* @param integer $tokenCategory
|
||||
* @param string $tokText
|
||||
* @param integer $position
|
||||
*/
|
||||
public function __construct($tokenCategory, $tokenText, $position)
|
||||
{
|
||||
$this->text = $tokenText;
|
||||
$this->position = $position + 1; // Start from 1
|
||||
|
||||
switch ($tokenCategory) {
|
||||
case self::TC_WORD:
|
||||
if ( strtolower($tokenText) == 'and') {
|
||||
$this->type = self::TT_AND_LEXEME;
|
||||
} else if (strtolower($tokenText) == 'or') {
|
||||
$this->type = self::TT_OR_LEXEME;
|
||||
} else if (strtolower($tokenText) == 'not') {
|
||||
$this->type = self::TT_NOT_LEXEME;
|
||||
} else if (strtolower($tokenText) == 'to') {
|
||||
$this->type = self::TT_TO_LEXEME;
|
||||
} else {
|
||||
$this->type = self::TT_WORD;
|
||||
}
|
||||
break;
|
||||
|
||||
case self::TC_PHRASE:
|
||||
$this->type = self::TT_PHRASE;
|
||||
break;
|
||||
|
||||
case self::TC_NUMBER:
|
||||
$this->type = self::TT_NUMBER;
|
||||
break;
|
||||
|
||||
case self::TC_SYNTAX_ELEMENT:
|
||||
switch ($tokenText) {
|
||||
case ':':
|
||||
$this->type = self::TT_FIELD_INDICATOR;
|
||||
break;
|
||||
|
||||
case '+':
|
||||
$this->type = self::TT_REQUIRED;
|
||||
break;
|
||||
|
||||
case '-':
|
||||
$this->type = self::TT_PROHIBITED;
|
||||
break;
|
||||
|
||||
case '~':
|
||||
$this->type = self::TT_FUZZY_PROX_MARK;
|
||||
break;
|
||||
|
||||
case '^':
|
||||
$this->type = self::TT_BOOSTING_MARK;
|
||||
break;
|
||||
|
||||
case '[':
|
||||
$this->type = self::TT_RANGE_INCL_START;
|
||||
break;
|
||||
|
||||
case ']':
|
||||
$this->type = self::TT_RANGE_INCL_END;
|
||||
break;
|
||||
|
||||
case '{':
|
||||
$this->type = self::TT_RANGE_EXCL_START;
|
||||
break;
|
||||
|
||||
case '}':
|
||||
$this->type = self::TT_RANGE_EXCL_END;
|
||||
break;
|
||||
|
||||
case '(':
|
||||
$this->type = self::TT_SUBQUERY_START;
|
||||
break;
|
||||
|
||||
case ')':
|
||||
$this->type = self::TT_SUBQUERY_END;
|
||||
break;
|
||||
|
||||
case '!':
|
||||
$this->type = self::TT_NOT_LEXEME;
|
||||
break;
|
||||
|
||||
case '&&':
|
||||
$this->type = self::TT_AND_LEXEME;
|
||||
break;
|
||||
|
||||
case '||':
|
||||
$this->type = self::TT_OR_LEXEME;
|
||||
break;
|
||||
|
||||
default:
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Unrecognized query syntax lexeme: \'' . $tokenText . '\'');
|
||||
}
|
||||
break;
|
||||
|
||||
case self::TC_NUMBER:
|
||||
$this->type = self::TT_NUMBER;
|
||||
|
||||
default:
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Unrecognized lexeme type: \'' . $tokenCategory . '\'');
|
||||
}
|
||||
}
|
||||
}
|
551
thirdparty/Zend/Search/Lucene/Search/Similarity.php
vendored
Normal file
551
thirdparty/Zend/Search/Lucene/Search/Similarity.php
vendored
Normal file
@ -0,0 +1,551 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Similarity.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
abstract class Zend_Search_Lucene_Search_Similarity
|
||||
{
|
||||
/**
|
||||
* The Similarity implementation used by default.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Search_Similarity
|
||||
*/
|
||||
private static $_defaultImpl;
|
||||
|
||||
/**
|
||||
* Cache of decoded bytes.
|
||||
* Array of floats
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private static $_normTable = array( 0 => 0.0,
|
||||
1 => 5.820766E-10,
|
||||
2 => 6.9849193E-10,
|
||||
3 => 8.1490725E-10,
|
||||
4 => 9.313226E-10,
|
||||
5 => 1.1641532E-9,
|
||||
6 => 1.3969839E-9,
|
||||
7 => 1.6298145E-9,
|
||||
8 => 1.8626451E-9,
|
||||
9 => 2.3283064E-9,
|
||||
10 => 2.7939677E-9,
|
||||
11 => 3.259629E-9,
|
||||
12 => 3.7252903E-9,
|
||||
13 => 4.656613E-9,
|
||||
14 => 5.5879354E-9,
|
||||
15 => 6.519258E-9,
|
||||
16 => 7.4505806E-9,
|
||||
17 => 9.313226E-9,
|
||||
18 => 1.1175871E-8,
|
||||
19 => 1.3038516E-8,
|
||||
20 => 1.4901161E-8,
|
||||
21 => 1.8626451E-8,
|
||||
22 => 2.2351742E-8,
|
||||
23 => 2.6077032E-8,
|
||||
24 => 2.9802322E-8,
|
||||
25 => 3.7252903E-8,
|
||||
26 => 4.4703484E-8,
|
||||
27 => 5.2154064E-8,
|
||||
28 => 5.9604645E-8,
|
||||
29 => 7.4505806E-8,
|
||||
30 => 8.940697E-8,
|
||||
31 => 1.0430813E-7,
|
||||
32 => 1.1920929E-7,
|
||||
33 => 1.4901161E-7,
|
||||
34 => 1.7881393E-7,
|
||||
35 => 2.0861626E-7,
|
||||
36 => 2.3841858E-7,
|
||||
37 => 2.9802322E-7,
|
||||
38 => 3.5762787E-7,
|
||||
39 => 4.172325E-7,
|
||||
40 => 4.7683716E-7,
|
||||
41 => 5.9604645E-7,
|
||||
42 => 7.1525574E-7,
|
||||
43 => 8.34465E-7,
|
||||
44 => 9.536743E-7,
|
||||
45 => 1.1920929E-6,
|
||||
46 => 1.4305115E-6,
|
||||
47 => 1.66893E-6,
|
||||
48 => 1.9073486E-6,
|
||||
49 => 2.3841858E-6,
|
||||
50 => 2.861023E-6,
|
||||
51 => 3.33786E-6,
|
||||
52 => 3.8146973E-6,
|
||||
53 => 4.7683716E-6,
|
||||
54 => 5.722046E-6,
|
||||
55 => 6.67572E-6,
|
||||
56 => 7.6293945E-6,
|
||||
57 => 9.536743E-6,
|
||||
58 => 1.1444092E-5,
|
||||
59 => 1.335144E-5,
|
||||
60 => 1.5258789E-5,
|
||||
61 => 1.9073486E-5,
|
||||
62 => 2.2888184E-5,
|
||||
63 => 2.670288E-5,
|
||||
64 => 3.0517578E-5,
|
||||
65 => 3.8146973E-5,
|
||||
66 => 4.5776367E-5,
|
||||
67 => 5.340576E-5,
|
||||
68 => 6.1035156E-5,
|
||||
69 => 7.6293945E-5,
|
||||
70 => 9.1552734E-5,
|
||||
71 => 1.0681152E-4,
|
||||
72 => 1.2207031E-4,
|
||||
73 => 1.5258789E-4,
|
||||
74 => 1.8310547E-4,
|
||||
75 => 2.1362305E-4,
|
||||
76 => 2.4414062E-4,
|
||||
77 => 3.0517578E-4,
|
||||
78 => 3.6621094E-4,
|
||||
79 => 4.272461E-4,
|
||||
80 => 4.8828125E-4,
|
||||
81 => 6.1035156E-4,
|
||||
82 => 7.324219E-4,
|
||||
83 => 8.544922E-4,
|
||||
84 => 9.765625E-4,
|
||||
85 => 0.0012207031,
|
||||
86 => 0.0014648438,
|
||||
87 => 0.0017089844,
|
||||
88 => 0.001953125,
|
||||
89 => 0.0024414062,
|
||||
90 => 0.0029296875,
|
||||
91 => 0.0034179688,
|
||||
92 => 0.00390625,
|
||||
93 => 0.0048828125,
|
||||
94 => 0.005859375,
|
||||
95 => 0.0068359375,
|
||||
96 => 0.0078125,
|
||||
97 => 0.009765625,
|
||||
98 => 0.01171875,
|
||||
99 => 0.013671875,
|
||||
100 => 0.015625,
|
||||
101 => 0.01953125,
|
||||
102 => 0.0234375,
|
||||
103 => 0.02734375,
|
||||
104 => 0.03125,
|
||||
105 => 0.0390625,
|
||||
106 => 0.046875,
|
||||
107 => 0.0546875,
|
||||
108 => 0.0625,
|
||||
109 => 0.078125,
|
||||
110 => 0.09375,
|
||||
111 => 0.109375,
|
||||
112 => 0.125,
|
||||
113 => 0.15625,
|
||||
114 => 0.1875,
|
||||
115 => 0.21875,
|
||||
116 => 0.25,
|
||||
117 => 0.3125,
|
||||
118 => 0.375,
|
||||
119 => 0.4375,
|
||||
120 => 0.5,
|
||||
121 => 0.625,
|
||||
122 => 0.75,
|
||||
123 => 0.875,
|
||||
124 => 1.0,
|
||||
125 => 1.25,
|
||||
126 => 1.5,
|
||||
127 => 1.75,
|
||||
128 => 2.0,
|
||||
129 => 2.5,
|
||||
130 => 3.0,
|
||||
131 => 3.5,
|
||||
132 => 4.0,
|
||||
133 => 5.0,
|
||||
134 => 6.0,
|
||||
135 => 7.0,
|
||||
136 => 8.0,
|
||||
137 => 10.0,
|
||||
138 => 12.0,
|
||||
139 => 14.0,
|
||||
140 => 16.0,
|
||||
141 => 20.0,
|
||||
142 => 24.0,
|
||||
143 => 28.0,
|
||||
144 => 32.0,
|
||||
145 => 40.0,
|
||||
146 => 48.0,
|
||||
147 => 56.0,
|
||||
148 => 64.0,
|
||||
149 => 80.0,
|
||||
150 => 96.0,
|
||||
151 => 112.0,
|
||||
152 => 128.0,
|
||||
153 => 160.0,
|
||||
154 => 192.0,
|
||||
155 => 224.0,
|
||||
156 => 256.0,
|
||||
157 => 320.0,
|
||||
158 => 384.0,
|
||||
159 => 448.0,
|
||||
160 => 512.0,
|
||||
161 => 640.0,
|
||||
162 => 768.0,
|
||||
163 => 896.0,
|
||||
164 => 1024.0,
|
||||
165 => 1280.0,
|
||||
166 => 1536.0,
|
||||
167 => 1792.0,
|
||||
168 => 2048.0,
|
||||
169 => 2560.0,
|
||||
170 => 3072.0,
|
||||
171 => 3584.0,
|
||||
172 => 4096.0,
|
||||
173 => 5120.0,
|
||||
174 => 6144.0,
|
||||
175 => 7168.0,
|
||||
176 => 8192.0,
|
||||
177 => 10240.0,
|
||||
178 => 12288.0,
|
||||
179 => 14336.0,
|
||||
180 => 16384.0,
|
||||
181 => 20480.0,
|
||||
182 => 24576.0,
|
||||
183 => 28672.0,
|
||||
184 => 32768.0,
|
||||
185 => 40960.0,
|
||||
186 => 49152.0,
|
||||
187 => 57344.0,
|
||||
188 => 65536.0,
|
||||
189 => 81920.0,
|
||||
190 => 98304.0,
|
||||
191 => 114688.0,
|
||||
192 => 131072.0,
|
||||
193 => 163840.0,
|
||||
194 => 196608.0,
|
||||
195 => 229376.0,
|
||||
196 => 262144.0,
|
||||
197 => 327680.0,
|
||||
198 => 393216.0,
|
||||
199 => 458752.0,
|
||||
200 => 524288.0,
|
||||
201 => 655360.0,
|
||||
202 => 786432.0,
|
||||
203 => 917504.0,
|
||||
204 => 1048576.0,
|
||||
205 => 1310720.0,
|
||||
206 => 1572864.0,
|
||||
207 => 1835008.0,
|
||||
208 => 2097152.0,
|
||||
209 => 2621440.0,
|
||||
210 => 3145728.0,
|
||||
211 => 3670016.0,
|
||||
212 => 4194304.0,
|
||||
213 => 5242880.0,
|
||||
214 => 6291456.0,
|
||||
215 => 7340032.0,
|
||||
216 => 8388608.0,
|
||||
217 => 1.048576E7,
|
||||
218 => 1.2582912E7,
|
||||
219 => 1.4680064E7,
|
||||
220 => 1.6777216E7,
|
||||
221 => 2.097152E7,
|
||||
222 => 2.5165824E7,
|
||||
223 => 2.9360128E7,
|
||||
224 => 3.3554432E7,
|
||||
225 => 4.194304E7,
|
||||
226 => 5.0331648E7,
|
||||
227 => 5.8720256E7,
|
||||
228 => 6.7108864E7,
|
||||
229 => 8.388608E7,
|
||||
230 => 1.00663296E8,
|
||||
231 => 1.17440512E8,
|
||||
232 => 1.34217728E8,
|
||||
233 => 1.6777216E8,
|
||||
234 => 2.01326592E8,
|
||||
235 => 2.34881024E8,
|
||||
236 => 2.68435456E8,
|
||||
237 => 3.3554432E8,
|
||||
238 => 4.02653184E8,
|
||||
239 => 4.69762048E8,
|
||||
240 => 5.3687091E8,
|
||||
241 => 6.7108864E8,
|
||||
242 => 8.0530637E8,
|
||||
243 => 9.395241E8,
|
||||
244 => 1.07374182E9,
|
||||
245 => 1.34217728E9,
|
||||
246 => 1.61061274E9,
|
||||
247 => 1.87904819E9,
|
||||
248 => 2.14748365E9,
|
||||
249 => 2.68435456E9,
|
||||
250 => 3.22122547E9,
|
||||
251 => 3.75809638E9,
|
||||
252 => 4.2949673E9,
|
||||
253 => 5.3687091E9,
|
||||
254 => 6.4424509E9,
|
||||
255 => 7.5161928E9 );
|
||||
|
||||
|
||||
/**
|
||||
* Set the default Similarity implementation used by indexing and search
|
||||
* code.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_Similarity $similarity
|
||||
*/
|
||||
public static function setDefault(Zend_Search_Lucene_Search_Similarity $similarity)
|
||||
{
|
||||
self::$_defaultImpl = $similarity;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return the default Similarity implementation used by indexing and search
|
||||
* code.
|
||||
*
|
||||
* @return Zend_Search_Lucene_Search_Similarity
|
||||
*/
|
||||
public static function getDefault()
|
||||
{
|
||||
if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Search_Similarity) {
|
||||
require_once 'Zend/Search/Lucene/Search/Similarity/Default.php';
|
||||
self::$_defaultImpl = new Zend_Search_Lucene_Search_Similarity_Default();
|
||||
}
|
||||
|
||||
return self::$_defaultImpl;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Computes the normalization value for a field given the total number of
|
||||
* terms contained in a field. These values, together with field boosts, are
|
||||
* stored in an index and multipled into scores for hits on each field by the
|
||||
* search code.
|
||||
*
|
||||
* Matches in longer fields are less precise, so implemenations of this
|
||||
* method usually return smaller values when 'numTokens' is large,
|
||||
* and larger values when 'numTokens' is small.
|
||||
*
|
||||
* That these values are computed under
|
||||
* IndexWriter::addDocument(Document) and stored then using
|
||||
* encodeNorm(float). Thus they have limited precision, and documents
|
||||
* must be re-indexed if this method is altered.
|
||||
*
|
||||
* fieldName - name of field
|
||||
* numTokens - the total number of tokens contained in fields named
|
||||
* 'fieldName' of 'doc'.
|
||||
* Returns a normalization factor for hits on this field of this document
|
||||
*
|
||||
* @param string $fieldName
|
||||
* @param integer $numTokens
|
||||
* @return float
|
||||
*/
|
||||
abstract public function lengthNorm($fieldName, $numTokens);
|
||||
|
||||
/**
|
||||
* Computes the normalization value for a query given the sum of the squared
|
||||
* weights of each of the query terms. This value is then multipled into the
|
||||
* weight of each query term.
|
||||
*
|
||||
* This does not affect ranking, but rather just attempts to make scores
|
||||
* from different queries comparable.
|
||||
*
|
||||
* sumOfSquaredWeights - the sum of the squares of query term weights
|
||||
* Returns a normalization factor for query weights
|
||||
*
|
||||
* @param float $sumOfSquaredWeights
|
||||
* @return float
|
||||
*/
|
||||
abstract public function queryNorm($sumOfSquaredWeights);
|
||||
|
||||
|
||||
/**
|
||||
* Decodes a normalization factor stored in an index.
|
||||
*
|
||||
* @param integer $byte
|
||||
* @return float
|
||||
*/
|
||||
public static function decodeNorm($byte)
|
||||
{
|
||||
return self::$_normTable[$byte & 0xFF];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Encodes a normalization factor for storage in an index.
|
||||
*
|
||||
* The encoding uses a five-bit exponent and three-bit mantissa, thus
|
||||
* representing values from around 7x10^9 to 2x10^-9 with about one
|
||||
* significant decimal digit of accuracy. Zero is also represented.
|
||||
* Negative numbers are rounded up to zero. Values too large to represent
|
||||
* are rounded down to the largest representable value. Positive values too
|
||||
* small to represent are rounded up to the smallest positive representable
|
||||
* value.
|
||||
*
|
||||
* @param float $f
|
||||
* @return integer
|
||||
*/
|
||||
static function encodeNorm($f)
|
||||
{
|
||||
return self::_floatToByte($f);
|
||||
}
|
||||
|
||||
/**
|
||||
* Float to byte conversion
|
||||
*
|
||||
* @param integer $b
|
||||
* @return float
|
||||
*/
|
||||
private static function _floatToByte($f)
|
||||
{
|
||||
// round negatives up to zero
|
||||
if ($f <= 0.0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// search for appropriate value
|
||||
$lowIndex = 0;
|
||||
$highIndex = 255;
|
||||
while ($highIndex >= $lowIndex) {
|
||||
// $mid = ($highIndex - $lowIndex)/2;
|
||||
$mid = ($highIndex + $lowIndex) >> 1;
|
||||
$delta = $f - self::$_normTable[$mid];
|
||||
|
||||
if ($delta < 0) {
|
||||
$highIndex = $mid-1;
|
||||
} elseif ($delta > 0) {
|
||||
$lowIndex = $mid+1;
|
||||
} else {
|
||||
return $mid; // We got it!
|
||||
}
|
||||
}
|
||||
|
||||
// round to closest value
|
||||
if ($highIndex != 255 &&
|
||||
$f - self::$_normTable[$highIndex] > self::$_normTable[$highIndex+1] - $f ) {
|
||||
return $highIndex + 1;
|
||||
} else {
|
||||
return $highIndex;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Computes a score factor based on a term or phrase's frequency in a
|
||||
* document. This value is multiplied by the idf(Term, Searcher)
|
||||
* factor for each term in the query and these products are then summed to
|
||||
* form the initial score for a document.
|
||||
*
|
||||
* Terms and phrases repeated in a document indicate the topic of the
|
||||
* document, so implementations of this method usually return larger values
|
||||
* when 'freq' is large, and smaller values when 'freq'
|
||||
* is small.
|
||||
*
|
||||
* freq - the frequency of a term within a document
|
||||
* Returns a score factor based on a term's within-document frequency
|
||||
*
|
||||
* @param float $freq
|
||||
* @return float
|
||||
*/
|
||||
abstract public function tf($freq);
|
||||
|
||||
/**
|
||||
* Computes the amount of a sloppy phrase match, based on an edit distance.
|
||||
* This value is summed for each sloppy phrase match in a document to form
|
||||
* the frequency that is passed to tf(float).
|
||||
*
|
||||
* A phrase match with a small edit distance to a document passage more
|
||||
* closely matches the document, so implementations of this method usually
|
||||
* return larger values when the edit distance is small and smaller values
|
||||
* when it is large.
|
||||
*
|
||||
* distance - the edit distance of this sloppy phrase match
|
||||
* Returns the frequency increment for this match
|
||||
*
|
||||
* @param integer $distance
|
||||
* @return float
|
||||
*/
|
||||
abstract public function sloppyFreq($distance);
|
||||
|
||||
|
||||
/**
|
||||
* Computes a score factor for a simple term or a phrase.
|
||||
*
|
||||
* The default implementation is:
|
||||
* return idfFreq(searcher.docFreq(term), searcher.maxDoc());
|
||||
*
|
||||
* input - the term in question or array of terms
|
||||
* reader - reader the document collection being searched
|
||||
* Returns a score factor for the term
|
||||
*
|
||||
* @param mixed $input
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return a score factor for the term
|
||||
*/
|
||||
public function idf($input, Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
if (!is_array($input)) {
|
||||
return $this->idfFreq($reader->docFreq($input), $reader->count());
|
||||
} else {
|
||||
$idf = 0.0;
|
||||
foreach ($input as $term) {
|
||||
$idf += $this->idfFreq($reader->docFreq($term), $reader->count());
|
||||
}
|
||||
return $idf;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes a score factor based on a term's document frequency (the number
|
||||
* of documents which contain the term). This value is multiplied by the
|
||||
* tf(int) factor for each term in the query and these products are
|
||||
* then summed to form the initial score for a document.
|
||||
*
|
||||
* Terms that occur in fewer documents are better indicators of topic, so
|
||||
* implemenations of this method usually return larger values for rare terms,
|
||||
* and smaller values for common terms.
|
||||
*
|
||||
* docFreq - the number of documents which contain the term
|
||||
* numDocs - the total number of documents in the collection
|
||||
* Returns a score factor based on the term's document frequency
|
||||
*
|
||||
* @param integer $docFreq
|
||||
* @param integer $numDocs
|
||||
* @return float
|
||||
*/
|
||||
abstract public function idfFreq($docFreq, $numDocs);
|
||||
|
||||
/**
|
||||
* Computes a score factor based on the fraction of all query terms that a
|
||||
* document contains. This value is multiplied into scores.
|
||||
*
|
||||
* The presence of a large portion of the query terms indicates a better
|
||||
* match with the query, so implemenations of this method usually return
|
||||
* larger values when the ratio between these parameters is large and smaller
|
||||
* values when the ratio between them is small.
|
||||
*
|
||||
* overlap - the number of query terms matched in the document
|
||||
* maxOverlap - the total number of terms in the query
|
||||
* Returns a score factor based on term overlap with the query
|
||||
*
|
||||
* @param integer $overlap
|
||||
* @param integer $maxOverlap
|
||||
* @return float
|
||||
*/
|
||||
abstract public function coord($overlap, $maxOverlap);
|
||||
}
|
||||
|
110
thirdparty/Zend/Search/Lucene/Search/Similarity/Default.php
vendored
Normal file
110
thirdparty/Zend/Search/Lucene/Search/Similarity/Default.php
vendored
Normal file
@ -0,0 +1,110 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Default.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Similarity */
|
||||
require_once 'Zend/Search/Lucene/Search/Similarity.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Similarity_Default extends Zend_Search_Lucene_Search_Similarity
|
||||
{
|
||||
|
||||
/**
|
||||
* Implemented as '1/sqrt(numTerms)'.
|
||||
*
|
||||
* @param string $fieldName
|
||||
* @param integer $numTerms
|
||||
* @return float
|
||||
*/
|
||||
public function lengthNorm($fieldName, $numTerms)
|
||||
{
|
||||
if ($numTerms == 0) {
|
||||
return 1E10;
|
||||
}
|
||||
|
||||
return 1.0/sqrt($numTerms);
|
||||
}
|
||||
|
||||
/**
|
||||
* Implemented as '1/sqrt(sumOfSquaredWeights)'.
|
||||
*
|
||||
* @param float $sumOfSquaredWeights
|
||||
* @return float
|
||||
*/
|
||||
public function queryNorm($sumOfSquaredWeights)
|
||||
{
|
||||
return 1.0/sqrt($sumOfSquaredWeights);
|
||||
}
|
||||
|
||||
/**
|
||||
* Implemented as 'sqrt(freq)'.
|
||||
*
|
||||
* @param float $freq
|
||||
* @return float
|
||||
*/
|
||||
public function tf($freq)
|
||||
{
|
||||
return sqrt($freq);
|
||||
}
|
||||
|
||||
/**
|
||||
* Implemented as '1/(distance + 1)'.
|
||||
*
|
||||
* @param integer $distance
|
||||
* @return float
|
||||
*/
|
||||
public function sloppyFreq($distance)
|
||||
{
|
||||
return 1.0/($distance + 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Implemented as 'log(numDocs/(docFreq+1)) + 1'.
|
||||
*
|
||||
* @param integer $docFreq
|
||||
* @param integer $numDocs
|
||||
* @return float
|
||||
*/
|
||||
public function idfFreq($docFreq, $numDocs)
|
||||
{
|
||||
return log($numDocs/(float)($docFreq+1)) + 1.0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implemented as 'overlap/maxOverlap'.
|
||||
*
|
||||
* @param integer $overlap
|
||||
* @param integer $maxOverlap
|
||||
* @return float
|
||||
*/
|
||||
public function coord($overlap, $maxOverlap)
|
||||
{
|
||||
return $overlap/(float)$maxOverlap;
|
||||
}
|
||||
}
|
85
thirdparty/Zend/Search/Lucene/Search/Weight.php
vendored
Normal file
85
thirdparty/Zend/Search/Lucene/Search/Weight.php
vendored
Normal file
@ -0,0 +1,85 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Weight.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Calculate query weights and build query scorers.
|
||||
*
|
||||
* A Weight is constructed by a query Query->createWeight().
|
||||
* The sumOfSquaredWeights() method is then called on the top-level
|
||||
* query to compute the query normalization factor Similarity->queryNorm(float).
|
||||
* This factor is then passed to normalize(float). At this point the weighting
|
||||
* is complete.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
abstract class Zend_Search_Lucene_Search_Weight
|
||||
{
|
||||
/**
|
||||
* Normalization factor.
|
||||
* This value is stored only for query expanation purpose and not used in any other place
|
||||
*
|
||||
* @var float
|
||||
*/
|
||||
protected $_queryNorm;
|
||||
|
||||
/**
|
||||
* Weight value
|
||||
*
|
||||
* Weight value may be initialized in sumOfSquaredWeights() or normalize()
|
||||
* because they both are invoked either in Query::_initWeight (for top-level query) or
|
||||
* in corresponding methods of parent query's weights
|
||||
*
|
||||
* @var float
|
||||
*/
|
||||
protected $_value;
|
||||
|
||||
|
||||
/**
|
||||
* The weight for this query.
|
||||
*
|
||||
* @return float
|
||||
*/
|
||||
public function getValue()
|
||||
{
|
||||
return $this->_value;
|
||||
}
|
||||
|
||||
/**
|
||||
* The sum of squared weights of contained query clauses.
|
||||
*
|
||||
* @return float
|
||||
*/
|
||||
abstract public function sumOfSquaredWeights();
|
||||
|
||||
/**
|
||||
* Assigns the query normalization factor to this.
|
||||
*
|
||||
* @param $norm
|
||||
*/
|
||||
abstract public function normalize($norm);
|
||||
}
|
||||
|
137
thirdparty/Zend/Search/Lucene/Search/Weight/Boolean.php
vendored
Normal file
137
thirdparty/Zend/Search/Lucene/Search/Weight/Boolean.php
vendored
Normal file
@ -0,0 +1,137 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Boolean.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Weight */
|
||||
require_once 'Zend/Search/Lucene/Search/Weight.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Weight_Boolean extends Zend_Search_Lucene_Search_Weight
|
||||
{
|
||||
/**
|
||||
* IndexReader.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Interface
|
||||
*/
|
||||
private $_reader;
|
||||
|
||||
/**
|
||||
* The query that this concerns.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
private $_query;
|
||||
|
||||
/**
|
||||
* Queries weights
|
||||
* Array of Zend_Search_Lucene_Search_Weight
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_weights;
|
||||
|
||||
|
||||
/**
|
||||
* Zend_Search_Lucene_Search_Weight_Boolean constructor
|
||||
* query - the query that this concerns.
|
||||
* reader - index reader
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_Query $query
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
*/
|
||||
public function __construct(Zend_Search_Lucene_Search_Query $query,
|
||||
Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
$this->_query = $query;
|
||||
$this->_reader = $reader;
|
||||
$this->_weights = array();
|
||||
|
||||
$signs = $query->getSigns();
|
||||
|
||||
foreach ($query->getSubqueries() as $num => $subquery) {
|
||||
if ($signs === null || $signs[$num] === null || $signs[$num]) {
|
||||
$this->_weights[$num] = $subquery->createWeight($reader);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The weight for this query
|
||||
* Standard Weight::$_value is not used for boolean queries
|
||||
*
|
||||
* @return float
|
||||
*/
|
||||
public function getValue()
|
||||
{
|
||||
return $this->_query->getBoost();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The sum of squared weights of contained query clauses.
|
||||
*
|
||||
* @return float
|
||||
*/
|
||||
public function sumOfSquaredWeights()
|
||||
{
|
||||
$sum = 0;
|
||||
foreach ($this->_weights as $weight) {
|
||||
// sum sub weights
|
||||
$sum += $weight->sumOfSquaredWeights();
|
||||
}
|
||||
|
||||
// boost each sub-weight
|
||||
$sum *= $this->_query->getBoost() * $this->_query->getBoost();
|
||||
|
||||
// check for empty query (like '-something -another')
|
||||
if ($sum == 0) {
|
||||
$sum = 1.0;
|
||||
}
|
||||
return $sum;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Assigns the query normalization factor to this.
|
||||
*
|
||||
* @param float $queryNorm
|
||||
*/
|
||||
public function normalize($queryNorm)
|
||||
{
|
||||
// incorporate boost
|
||||
$queryNorm *= $this->_query->getBoost();
|
||||
|
||||
foreach ($this->_weights as $weight) {
|
||||
$weight->normalize($queryNorm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
57
thirdparty/Zend/Search/Lucene/Search/Weight/Empty.php
vendored
Normal file
57
thirdparty/Zend/Search/Lucene/Search/Weight/Empty.php
vendored
Normal file
@ -0,0 +1,57 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Empty.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Weight */
|
||||
require_once 'Zend/Search/Lucene/Search/Weight.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Weight_Empty extends Zend_Search_Lucene_Search_Weight
|
||||
{
|
||||
/**
|
||||
* The sum of squared weights of contained query clauses.
|
||||
*
|
||||
* @return float
|
||||
*/
|
||||
public function sumOfSquaredWeights()
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Assigns the query normalization factor to this.
|
||||
*
|
||||
* @param float $queryNorm
|
||||
*/
|
||||
public function normalize($queryNorm)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
139
thirdparty/Zend/Search/Lucene/Search/Weight/MultiTerm.php
vendored
Normal file
139
thirdparty/Zend/Search/Lucene/Search/Weight/MultiTerm.php
vendored
Normal file
@ -0,0 +1,139 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: MultiTerm.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Weight */
|
||||
require_once 'Zend/Search/Lucene/Search/Weight.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Weight_MultiTerm extends Zend_Search_Lucene_Search_Weight
|
||||
{
|
||||
/**
|
||||
* IndexReader.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Interface
|
||||
*/
|
||||
private $_reader;
|
||||
|
||||
/**
|
||||
* The query that this concerns.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
private $_query;
|
||||
|
||||
/**
|
||||
* Query terms weights
|
||||
* Array of Zend_Search_Lucene_Search_Weight_Term
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_weights;
|
||||
|
||||
|
||||
/**
|
||||
* Zend_Search_Lucene_Search_Weight_MultiTerm constructor
|
||||
* query - the query that this concerns.
|
||||
* reader - index reader
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_Query $query
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
*/
|
||||
public function __construct(Zend_Search_Lucene_Search_Query $query,
|
||||
Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
$this->_query = $query;
|
||||
$this->_reader = $reader;
|
||||
$this->_weights = array();
|
||||
|
||||
$signs = $query->getSigns();
|
||||
|
||||
foreach ($query->getTerms() as $id => $term) {
|
||||
if ($signs === null || $signs[$id] === null || $signs[$id]) {
|
||||
require_once 'Zend/Search/Lucene/Search/Weight/Term.php';
|
||||
$this->_weights[$id] = new Zend_Search_Lucene_Search_Weight_Term($term, $query, $reader);
|
||||
$query->setWeight($id, $this->_weights[$id]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The weight for this query
|
||||
* Standard Weight::$_value is not used for boolean queries
|
||||
*
|
||||
* @return float
|
||||
*/
|
||||
public function getValue()
|
||||
{
|
||||
return $this->_query->getBoost();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The sum of squared weights of contained query clauses.
|
||||
*
|
||||
* @return float
|
||||
*/
|
||||
public function sumOfSquaredWeights()
|
||||
{
|
||||
$sum = 0;
|
||||
foreach ($this->_weights as $weight) {
|
||||
// sum sub weights
|
||||
$sum += $weight->sumOfSquaredWeights();
|
||||
}
|
||||
|
||||
// boost each sub-weight
|
||||
$sum *= $this->_query->getBoost() * $this->_query->getBoost();
|
||||
|
||||
// check for empty query (like '-something -another')
|
||||
if ($sum == 0) {
|
||||
$sum = 1.0;
|
||||
}
|
||||
return $sum;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Assigns the query normalization factor to this.
|
||||
*
|
||||
* @param float $queryNorm
|
||||
*/
|
||||
public function normalize($queryNorm)
|
||||
{
|
||||
// incorporate boost
|
||||
$queryNorm *= $this->_query->getBoost();
|
||||
|
||||
foreach ($this->_weights as $weight) {
|
||||
$weight->normalize($queryNorm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
108
thirdparty/Zend/Search/Lucene/Search/Weight/Phrase.php
vendored
Normal file
108
thirdparty/Zend/Search/Lucene/Search/Weight/Phrase.php
vendored
Normal file
@ -0,0 +1,108 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Phrase.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Zend_Search_Lucene_Search_Weight
|
||||
*/
|
||||
require_once 'Zend/Search/Lucene/Search/Weight.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Weight_Phrase extends Zend_Search_Lucene_Search_Weight
|
||||
{
|
||||
/**
|
||||
* IndexReader.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Interface
|
||||
*/
|
||||
private $_reader;
|
||||
|
||||
/**
|
||||
* The query that this concerns.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Search_Query_Phrase
|
||||
*/
|
||||
private $_query;
|
||||
|
||||
/**
|
||||
* Score factor
|
||||
*
|
||||
* @var float
|
||||
*/
|
||||
private $_idf;
|
||||
|
||||
/**
|
||||
* Zend_Search_Lucene_Search_Weight_Phrase constructor
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_Query_Phrase $query
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
*/
|
||||
public function __construct(Zend_Search_Lucene_Search_Query_Phrase $query,
|
||||
Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
$this->_query = $query;
|
||||
$this->_reader = $reader;
|
||||
}
|
||||
|
||||
/**
|
||||
* The sum of squared weights of contained query clauses.
|
||||
*
|
||||
* @return float
|
||||
*/
|
||||
public function sumOfSquaredWeights()
|
||||
{
|
||||
// compute idf
|
||||
$this->_idf = $this->_reader->getSimilarity()->idf($this->_query->getTerms(), $this->_reader);
|
||||
|
||||
// compute query weight
|
||||
$this->_queryWeight = $this->_idf * $this->_query->getBoost();
|
||||
|
||||
// square it
|
||||
return $this->_queryWeight * $this->_queryWeight;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Assigns the query normalization factor to this.
|
||||
*
|
||||
* @param float $queryNorm
|
||||
*/
|
||||
public function normalize($queryNorm)
|
||||
{
|
||||
$this->_queryNorm = $queryNorm;
|
||||
|
||||
// normalize query weight
|
||||
$this->_queryWeight *= $queryNorm;
|
||||
|
||||
// idf for documents
|
||||
$this->_value = $this->_queryWeight * $this->_idf;
|
||||
}
|
||||
}
|
||||
|
||||
|
125
thirdparty/Zend/Search/Lucene/Search/Weight/Term.php
vendored
Normal file
125
thirdparty/Zend/Search/Lucene/Search/Weight/Term.php
vendored
Normal file
@ -0,0 +1,125 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Term.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Weight */
|
||||
require_once 'Zend/Search/Lucene/Search/Weight.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Weight_Term extends Zend_Search_Lucene_Search_Weight
|
||||
{
|
||||
/**
|
||||
* IndexReader.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Interface
|
||||
*/
|
||||
private $_reader;
|
||||
|
||||
/**
|
||||
* Term
|
||||
*
|
||||
* @var Zend_Search_Lucene_Index_Term
|
||||
*/
|
||||
private $_term;
|
||||
|
||||
/**
|
||||
* The query that this concerns.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
private $_query;
|
||||
|
||||
/**
|
||||
* Score factor
|
||||
*
|
||||
* @var float
|
||||
*/
|
||||
private $_idf;
|
||||
|
||||
/**
|
||||
* Query weight
|
||||
*
|
||||
* @var float
|
||||
*/
|
||||
private $_queryWeight;
|
||||
|
||||
|
||||
/**
|
||||
* Zend_Search_Lucene_Search_Weight_Term constructor
|
||||
* reader - index reader
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param Zend_Search_Lucene_Search_Query $query
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
*/
|
||||
public function __construct(Zend_Search_Lucene_Index_Term $term,
|
||||
Zend_Search_Lucene_Search_Query $query,
|
||||
Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
$this->_term = $term;
|
||||
$this->_query = $query;
|
||||
$this->_reader = $reader;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The sum of squared weights of contained query clauses.
|
||||
*
|
||||
* @return float
|
||||
*/
|
||||
public function sumOfSquaredWeights()
|
||||
{
|
||||
// compute idf
|
||||
$this->_idf = $this->_reader->getSimilarity()->idf($this->_term, $this->_reader);
|
||||
|
||||
// compute query weight
|
||||
$this->_queryWeight = $this->_idf * $this->_query->getBoost();
|
||||
|
||||
// square it
|
||||
return $this->_queryWeight * $this->_queryWeight;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Assigns the query normalization factor to this.
|
||||
*
|
||||
* @param float $queryNorm
|
||||
*/
|
||||
public function normalize($queryNorm)
|
||||
{
|
||||
$this->_queryNorm = $queryNorm;
|
||||
|
||||
// normalize query weight
|
||||
$this->_queryWeight *= $queryNorm;
|
||||
|
||||
// idf for documents
|
||||
$this->_value = $this->_queryWeight * $this->_idf;
|
||||
}
|
||||
}
|
||||
|
136
thirdparty/Zend/Search/Lucene/Storage/Directory.php
vendored
Normal file
136
thirdparty/Zend/Search/Lucene/Storage/Directory.php
vendored
Normal file
@ -0,0 +1,136 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Storage
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Directory.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Storage
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
abstract class Zend_Search_Lucene_Storage_Directory
|
||||
{
|
||||
|
||||
/**
|
||||
* Closes the store.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
abstract public function close();
|
||||
|
||||
/**
|
||||
* Returns an array of strings, one for each file in the directory.
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
abstract public function fileList();
|
||||
|
||||
/**
|
||||
* Creates a new, empty file in the directory with the given $filename.
|
||||
*
|
||||
* @param string $filename
|
||||
* @return Zend_Search_Lucene_Storage_File
|
||||
*/
|
||||
abstract public function createFile($filename);
|
||||
|
||||
|
||||
/**
|
||||
* Removes an existing $filename in the directory.
|
||||
*
|
||||
* @param string $filename
|
||||
* @return void
|
||||
*/
|
||||
abstract public function deleteFile($filename);
|
||||
|
||||
/**
|
||||
* Purge file if it's cached by directory object
|
||||
*
|
||||
* Method is used to prevent 'too many open files' error
|
||||
*
|
||||
* @param string $filename
|
||||
* @return void
|
||||
*/
|
||||
abstract public function purgeFile($filename);
|
||||
|
||||
/**
|
||||
* Returns true if a file with the given $filename exists.
|
||||
*
|
||||
* @param string $filename
|
||||
* @return boolean
|
||||
*/
|
||||
abstract public function fileExists($filename);
|
||||
|
||||
|
||||
/**
|
||||
* Returns the length of a $filename in the directory.
|
||||
*
|
||||
* @param string $filename
|
||||
* @return integer
|
||||
*/
|
||||
abstract public function fileLength($filename);
|
||||
|
||||
|
||||
/**
|
||||
* Returns the UNIX timestamp $filename was last modified.
|
||||
*
|
||||
* @param string $filename
|
||||
* @return integer
|
||||
*/
|
||||
abstract public function fileModified($filename);
|
||||
|
||||
|
||||
/**
|
||||
* Renames an existing file in the directory.
|
||||
*
|
||||
* @param string $from
|
||||
* @param string $to
|
||||
* @return void
|
||||
*/
|
||||
abstract public function renameFile($from, $to);
|
||||
|
||||
|
||||
/**
|
||||
* Sets the modified time of $filename to now.
|
||||
*
|
||||
* @param string $filename
|
||||
* @return void
|
||||
*/
|
||||
abstract public function touchFile($filename);
|
||||
|
||||
|
||||
/**
|
||||
* Returns a Zend_Search_Lucene_Storage_File object for a given $filename in the directory.
|
||||
*
|
||||
* If $shareHandler option is true, then file handler can be shared between File Object
|
||||
* requests. It speed-ups performance, but makes problems with file position.
|
||||
* Shared handler are good for short atomic requests.
|
||||
* Non-shared handlers are useful for stream file reading (especial for compound files).
|
||||
*
|
||||
* @param string $filename
|
||||
* @param boolean $shareHandler
|
||||
* @return Zend_Search_Lucene_Storage_File
|
||||
*/
|
||||
abstract public function getFileObject($filename, $shareHandler = true);
|
||||
|
||||
}
|
||||
|
362
thirdparty/Zend/Search/Lucene/Storage/Directory/Filesystem.php
vendored
Normal file
362
thirdparty/Zend/Search/Lucene/Storage/Directory/Filesystem.php
vendored
Normal file
@ -0,0 +1,362 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Storage
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Filesystem.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Storage_Directory */
|
||||
require_once 'Zend/Search/Lucene/Storage/Directory.php';
|
||||
|
||||
|
||||
/**
|
||||
* FileSystem implementation of Directory abstraction.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Storage
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Storage_Directory_Filesystem extends Zend_Search_Lucene_Storage_Directory
|
||||
{
|
||||
/**
|
||||
* Filesystem path to the directory
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $_dirPath = null;
|
||||
|
||||
/**
|
||||
* Cache for Zend_Search_Lucene_Storage_File_Filesystem objects
|
||||
* Array: filename => Zend_Search_Lucene_Storage_File object
|
||||
*
|
||||
* @var array
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
protected $_fileHandlers;
|
||||
|
||||
/**
|
||||
* Default file permissions
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
protected static $_defaultFilePermissions = 0666;
|
||||
|
||||
|
||||
/**
|
||||
* Get default file permissions
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public static function getDefaultFilePermissions()
|
||||
{
|
||||
return self::$_defaultFilePermissions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set default file permissions
|
||||
*
|
||||
* @param integer $mode
|
||||
*/
|
||||
public static function setDefaultFilePermissions($mode)
|
||||
{
|
||||
self::$_defaultFilePermissions = $mode;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Utility function to recursive directory creation
|
||||
*
|
||||
* @param string $dir
|
||||
* @param integer $mode
|
||||
* @param boolean $recursive
|
||||
* @return boolean
|
||||
*/
|
||||
|
||||
public static function mkdirs($dir, $mode = 0777, $recursive = true)
|
||||
{
|
||||
if (($dir === null) || $dir === '') {
|
||||
return false;
|
||||
}
|
||||
if (is_dir($dir) || $dir === '/') {
|
||||
return true;
|
||||
}
|
||||
if (self::mkdirs(dirname($dir), $mode, $recursive)) {
|
||||
return mkdir($dir, $mode);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Object constructor
|
||||
* Checks if $path is a directory or tries to create it.
|
||||
*
|
||||
* @param string $path
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function __construct($path)
|
||||
{
|
||||
if (!is_dir($path)) {
|
||||
if (file_exists($path)) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Path exists, but it\'s not a directory');
|
||||
} else {
|
||||
if (!self::mkdirs($path)) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception("Can't create directory '$path'.");
|
||||
}
|
||||
}
|
||||
}
|
||||
$this->_dirPath = $path;
|
||||
$this->_fileHandlers = array();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Closes the store.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function close()
|
||||
{
|
||||
foreach ($this->_fileHandlers as $fileObject) {
|
||||
$fileObject->close();
|
||||
}
|
||||
|
||||
$this->_fileHandlers = array();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns an array of strings, one for each file in the directory.
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function fileList()
|
||||
{
|
||||
$result = array();
|
||||
|
||||
$dirContent = opendir( $this->_dirPath );
|
||||
while (($file = readdir($dirContent)) !== false) {
|
||||
if (($file == '..')||($file == '.')) continue;
|
||||
|
||||
if( !is_dir($this->_dirPath . '/' . $file) ) {
|
||||
$result[] = $file;
|
||||
}
|
||||
}
|
||||
closedir($dirContent);
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new, empty file in the directory with the given $filename.
|
||||
*
|
||||
* @param string $filename
|
||||
* @return Zend_Search_Lucene_Storage_File
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function createFile($filename)
|
||||
{
|
||||
if (isset($this->_fileHandlers[$filename])) {
|
||||
$this->_fileHandlers[$filename]->close();
|
||||
}
|
||||
unset($this->_fileHandlers[$filename]);
|
||||
require_once 'Zend/Search/Lucene/Storage/File/Filesystem.php';
|
||||
$this->_fileHandlers[$filename] = new Zend_Search_Lucene_Storage_File_Filesystem($this->_dirPath . '/' . $filename, 'w+b');
|
||||
|
||||
// Set file permissions, but don't care about any possible failures, since file may be already
|
||||
// created by anther user which has to care about right permissions
|
||||
@chmod($this->_dirPath . '/' . $filename, self::$_defaultFilePermissions);
|
||||
|
||||
return $this->_fileHandlers[$filename];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Removes an existing $filename in the directory.
|
||||
*
|
||||
* @param string $filename
|
||||
* @return void
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function deleteFile($filename)
|
||||
{
|
||||
if (isset($this->_fileHandlers[$filename])) {
|
||||
$this->_fileHandlers[$filename]->close();
|
||||
}
|
||||
unset($this->_fileHandlers[$filename]);
|
||||
|
||||
global $php_errormsg;
|
||||
$trackErrors = ini_get('track_errors'); ini_set('track_errors', '1');
|
||||
if (!@unlink($this->_dirPath . '/' . $filename)) {
|
||||
ini_set('track_errors', $trackErrors);
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Can\'t delete file: ' . $php_errormsg);
|
||||
}
|
||||
ini_set('track_errors', $trackErrors);
|
||||
}
|
||||
|
||||
/**
|
||||
* Purge file if it's cached by directory object
|
||||
*
|
||||
* Method is used to prevent 'too many open files' error
|
||||
*
|
||||
* @param string $filename
|
||||
* @return void
|
||||
*/
|
||||
public function purgeFile($filename)
|
||||
{
|
||||
if (isset($this->_fileHandlers[$filename])) {
|
||||
$this->_fileHandlers[$filename]->close();
|
||||
}
|
||||
unset($this->_fileHandlers[$filename]);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns true if a file with the given $filename exists.
|
||||
*
|
||||
* @param string $filename
|
||||
* @return boolean
|
||||
*/
|
||||
public function fileExists($filename)
|
||||
{
|
||||
return isset($this->_fileHandlers[$filename]) ||
|
||||
file_exists($this->_dirPath . '/' . $filename);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the length of a $filename in the directory.
|
||||
*
|
||||
* @param string $filename
|
||||
* @return integer
|
||||
*/
|
||||
public function fileLength($filename)
|
||||
{
|
||||
if (isset( $this->_fileHandlers[$filename] )) {
|
||||
return $this->_fileHandlers[$filename]->size();
|
||||
}
|
||||
return filesize($this->_dirPath .'/'. $filename);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the UNIX timestamp $filename was last modified.
|
||||
*
|
||||
* @param string $filename
|
||||
* @return integer
|
||||
*/
|
||||
public function fileModified($filename)
|
||||
{
|
||||
return filemtime($this->_dirPath .'/'. $filename);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Renames an existing file in the directory.
|
||||
*
|
||||
* @param string $from
|
||||
* @param string $to
|
||||
* @return void
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function renameFile($from, $to)
|
||||
{
|
||||
global $php_errormsg;
|
||||
|
||||
if (isset($this->_fileHandlers[$from])) {
|
||||
$this->_fileHandlers[$from]->close();
|
||||
}
|
||||
unset($this->_fileHandlers[$from]);
|
||||
|
||||
if (isset($this->_fileHandlers[$to])) {
|
||||
$this->_fileHandlers[$to]->close();
|
||||
}
|
||||
unset($this->_fileHandlers[$to]);
|
||||
|
||||
if (file_exists($this->_dirPath . '/' . $to)) {
|
||||
if (!unlink($this->_dirPath . '/' . $to)) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Delete operation failed');
|
||||
}
|
||||
}
|
||||
|
||||
$trackErrors = ini_get('track_errors');
|
||||
ini_set('track_errors', '1');
|
||||
|
||||
$success = @rename($this->_dirPath . '/' . $from, $this->_dirPath . '/' . $to);
|
||||
if (!$success) {
|
||||
ini_set('track_errors', $trackErrors);
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception($php_errormsg);
|
||||
}
|
||||
|
||||
ini_set('track_errors', $trackErrors);
|
||||
|
||||
return $success;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the modified time of $filename to now.
|
||||
*
|
||||
* @param string $filename
|
||||
* @return void
|
||||
*/
|
||||
public function touchFile($filename)
|
||||
{
|
||||
return touch($this->_dirPath .'/'. $filename);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a Zend_Search_Lucene_Storage_File object for a given $filename in the directory.
|
||||
*
|
||||
* If $shareHandler option is true, then file handler can be shared between File Object
|
||||
* requests. It speed-ups performance, but makes problems with file position.
|
||||
* Shared handler are good for short atomic requests.
|
||||
* Non-shared handlers are useful for stream file reading (especial for compound files).
|
||||
*
|
||||
* @param string $filename
|
||||
* @param boolean $shareHandler
|
||||
* @return Zend_Search_Lucene_Storage_File
|
||||
*/
|
||||
public function getFileObject($filename, $shareHandler = true)
|
||||
{
|
||||
$fullFilename = $this->_dirPath . '/' . $filename;
|
||||
|
||||
require_once 'Zend/Search/Lucene/Storage/File/Filesystem.php';
|
||||
if (!$shareHandler) {
|
||||
return new Zend_Search_Lucene_Storage_File_Filesystem($fullFilename);
|
||||
}
|
||||
|
||||
if (isset( $this->_fileHandlers[$filename] )) {
|
||||
$this->_fileHandlers[$filename]->seek(0);
|
||||
return $this->_fileHandlers[$filename];
|
||||
}
|
||||
|
||||
$this->_fileHandlers[$filename] = new Zend_Search_Lucene_Storage_File_Filesystem($fullFilename);
|
||||
return $this->_fileHandlers[$filename];
|
||||
}
|
||||
}
|
||||
|
473
thirdparty/Zend/Search/Lucene/Storage/File.php
vendored
Normal file
473
thirdparty/Zend/Search/Lucene/Storage/File.php
vendored
Normal file
@ -0,0 +1,473 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Storage
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: File.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Storage
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
abstract class Zend_Search_Lucene_Storage_File
|
||||
{
|
||||
/**
|
||||
* Reads $length number of bytes at the current position in the
|
||||
* file and advances the file pointer.
|
||||
*
|
||||
* @param integer $length
|
||||
* @return string
|
||||
*/
|
||||
abstract protected function _fread($length=1);
|
||||
|
||||
|
||||
/**
|
||||
* Sets the file position indicator and advances the file pointer.
|
||||
* The new position, measured in bytes from the beginning of the file,
|
||||
* is obtained by adding offset to the position specified by whence,
|
||||
* whose values are defined as follows:
|
||||
* SEEK_SET - Set position equal to offset bytes.
|
||||
* SEEK_CUR - Set position to current location plus offset.
|
||||
* SEEK_END - Set position to end-of-file plus offset. (To move to
|
||||
* a position before the end-of-file, you need to pass a negative value
|
||||
* in offset.)
|
||||
* Upon success, returns 0; otherwise, returns -1
|
||||
*
|
||||
* @param integer $offset
|
||||
* @param integer $whence
|
||||
* @return integer
|
||||
*/
|
||||
abstract public function seek($offset, $whence=SEEK_SET);
|
||||
|
||||
/**
|
||||
* Get file position.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
abstract public function tell();
|
||||
|
||||
/**
|
||||
* Flush output.
|
||||
*
|
||||
* Returns true on success or false on failure.
|
||||
*
|
||||
* @return boolean
|
||||
*/
|
||||
abstract public function flush();
|
||||
|
||||
/**
|
||||
* Writes $length number of bytes (all, if $length===null) to the end
|
||||
* of the file.
|
||||
*
|
||||
* @param string $data
|
||||
* @param integer $length
|
||||
*/
|
||||
abstract protected function _fwrite($data, $length=null);
|
||||
|
||||
/**
|
||||
* Lock file
|
||||
*
|
||||
* Lock type may be a LOCK_SH (shared lock) or a LOCK_EX (exclusive lock)
|
||||
*
|
||||
* @param integer $lockType
|
||||
* @return boolean
|
||||
*/
|
||||
abstract public function lock($lockType, $nonBlockinLock = false);
|
||||
|
||||
/**
|
||||
* Unlock file
|
||||
*/
|
||||
abstract public function unlock();
|
||||
|
||||
/**
|
||||
* Reads a byte from the current position in the file
|
||||
* and advances the file pointer.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function readByte()
|
||||
{
|
||||
return ord($this->_fread(1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a byte to the end of the file.
|
||||
*
|
||||
* @param integer $byte
|
||||
*/
|
||||
public function writeByte($byte)
|
||||
{
|
||||
return $this->_fwrite(chr($byte), 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read num bytes from the current position in the file
|
||||
* and advances the file pointer.
|
||||
*
|
||||
* @param integer $num
|
||||
* @return string
|
||||
*/
|
||||
public function readBytes($num)
|
||||
{
|
||||
return $this->_fread($num);
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes num bytes of data (all, if $num===null) to the end
|
||||
* of the string.
|
||||
*
|
||||
* @param string $data
|
||||
* @param integer $num
|
||||
*/
|
||||
public function writeBytes($data, $num=null)
|
||||
{
|
||||
$this->_fwrite($data, $num);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Reads an integer from the current position in the file
|
||||
* and advances the file pointer.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function readInt()
|
||||
{
|
||||
$str = $this->_fread(4);
|
||||
|
||||
return ord($str[0]) << 24 |
|
||||
ord($str[1]) << 16 |
|
||||
ord($str[2]) << 8 |
|
||||
ord($str[3]);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Writes an integer to the end of file.
|
||||
*
|
||||
* @param integer $value
|
||||
*/
|
||||
public function writeInt($value)
|
||||
{
|
||||
settype($value, 'integer');
|
||||
$this->_fwrite( chr($value>>24 & 0xFF) .
|
||||
chr($value>>16 & 0xFF) .
|
||||
chr($value>>8 & 0xFF) .
|
||||
chr($value & 0xFF), 4 );
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a long integer from the current position in the file
|
||||
* and advances the file pointer.
|
||||
*
|
||||
* @return integer|float
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function readLong()
|
||||
{
|
||||
/**
|
||||
* Check, that we work in 64-bit mode.
|
||||
* fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
|
||||
*/
|
||||
if (PHP_INT_SIZE > 4) {
|
||||
$str = $this->_fread(8);
|
||||
|
||||
return ord($str[0]) << 56 |
|
||||
ord($str[1]) << 48 |
|
||||
ord($str[2]) << 40 |
|
||||
ord($str[3]) << 32 |
|
||||
ord($str[4]) << 24 |
|
||||
ord($str[5]) << 16 |
|
||||
ord($str[6]) << 8 |
|
||||
ord($str[7]);
|
||||
} else {
|
||||
return $this->readLong32Bit();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes long integer to the end of file
|
||||
*
|
||||
* @param integer $value
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function writeLong($value)
|
||||
{
|
||||
/**
|
||||
* Check, that we work in 64-bit mode.
|
||||
* fseek() and ftell() use long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
|
||||
*/
|
||||
if (PHP_INT_SIZE > 4) {
|
||||
settype($value, 'integer');
|
||||
$this->_fwrite( chr($value>>56 & 0xFF) .
|
||||
chr($value>>48 & 0xFF) .
|
||||
chr($value>>40 & 0xFF) .
|
||||
chr($value>>32 & 0xFF) .
|
||||
chr($value>>24 & 0xFF) .
|
||||
chr($value>>16 & 0xFF) .
|
||||
chr($value>>8 & 0xFF) .
|
||||
chr($value & 0xFF), 8 );
|
||||
} else {
|
||||
$this->writeLong32Bit($value);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a long integer from the current position in the file,
|
||||
* advances the file pointer and return it as float (for 32-bit platforms).
|
||||
*
|
||||
* @return integer|float
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function readLong32Bit()
|
||||
{
|
||||
$wordHigh = $this->readInt();
|
||||
$wordLow = $this->readInt();
|
||||
|
||||
if ($wordHigh & (int)0x80000000) {
|
||||
// It's a negative value since the highest bit is set
|
||||
if ($wordHigh == (int)0xFFFFFFFF && ($wordLow & (int)0x80000000)) {
|
||||
return $wordLow;
|
||||
} else {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if ($wordLow < 0) {
|
||||
// Value is large than 0x7FFF FFFF. Represent low word as float.
|
||||
$wordLow &= 0x7FFFFFFF;
|
||||
$wordLow += (float)0x80000000;
|
||||
}
|
||||
|
||||
if ($wordHigh == 0) {
|
||||
// Return value as integer if possible
|
||||
return $wordLow;
|
||||
}
|
||||
|
||||
return $wordHigh*(float)0x100000000/* 0x00000001 00000000 */ + $wordLow;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Writes long integer to the end of file (32-bit platforms implementation)
|
||||
*
|
||||
* @param integer|float $value
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function writeLong32Bit($value)
|
||||
{
|
||||
if ($value < (int)0x80000000) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
|
||||
}
|
||||
|
||||
if ($value < 0) {
|
||||
$wordHigh = (int)0xFFFFFFFF;
|
||||
$wordLow = (int)$value;
|
||||
} else {
|
||||
$wordHigh = (int)($value/(float)0x100000000/* 0x00000001 00000000 */);
|
||||
$wordLow = $value - $wordHigh*(float)0x100000000/* 0x00000001 00000000 */;
|
||||
|
||||
if ($wordLow > 0x7FFFFFFF) {
|
||||
// Highest bit of low word is set. Translate it to the corresponding negative integer value
|
||||
$wordLow -= 0x80000000;
|
||||
$wordLow |= 0x80000000;
|
||||
}
|
||||
}
|
||||
|
||||
$this->writeInt($wordHigh);
|
||||
$this->writeInt($wordLow);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a variable-length integer from the current
|
||||
* position in the file and advances the file pointer.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function readVInt()
|
||||
{
|
||||
$nextByte = ord($this->_fread(1));
|
||||
$val = $nextByte & 0x7F;
|
||||
|
||||
for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) {
|
||||
$nextByte = ord($this->_fread(1));
|
||||
$val |= ($nextByte & 0x7F) << $shift;
|
||||
}
|
||||
return $val;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a variable-length integer to the end of file.
|
||||
*
|
||||
* @param integer $value
|
||||
*/
|
||||
public function writeVInt($value)
|
||||
{
|
||||
settype($value, 'integer');
|
||||
while ($value > 0x7F) {
|
||||
$this->_fwrite(chr( ($value & 0x7F)|0x80 ));
|
||||
$value >>= 7;
|
||||
}
|
||||
$this->_fwrite(chr($value));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Reads a string from the current position in the file
|
||||
* and advances the file pointer.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function readString()
|
||||
{
|
||||
$strlen = $this->readVInt();
|
||||
if ($strlen == 0) {
|
||||
return '';
|
||||
} else {
|
||||
/**
|
||||
* This implementation supports only Basic Multilingual Plane
|
||||
* (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
|
||||
* "supplementary characters" (characters whose code points are
|
||||
* greater than 0xFFFF)
|
||||
* Java 2 represents these characters as a pair of char (16-bit)
|
||||
* values, the first from the high-surrogates range (0xD800-0xDBFF),
|
||||
* the second from the low-surrogates range (0xDC00-0xDFFF). Then
|
||||
* they are encoded as usual UTF-8 characters in six bytes.
|
||||
* Standard UTF-8 representation uses four bytes for supplementary
|
||||
* characters.
|
||||
*/
|
||||
|
||||
$str_val = $this->_fread($strlen);
|
||||
|
||||
for ($count = 0; $count < $strlen; $count++ ) {
|
||||
if (( ord($str_val[$count]) & 0xC0 ) == 0xC0) {
|
||||
$addBytes = 1;
|
||||
if (ord($str_val[$count]) & 0x20 ) {
|
||||
$addBytes++;
|
||||
|
||||
// Never used. Java2 doesn't encode strings in four bytes
|
||||
if (ord($str_val[$count]) & 0x10 ) {
|
||||
$addBytes++;
|
||||
}
|
||||
}
|
||||
$str_val .= $this->_fread($addBytes);
|
||||
$strlen += $addBytes;
|
||||
|
||||
// Check for null character. Java2 encodes null character
|
||||
// in two bytes.
|
||||
if (ord($str_val[$count]) == 0xC0 &&
|
||||
ord($str_val[$count+1]) == 0x80 ) {
|
||||
$str_val[$count] = 0;
|
||||
$str_val = substr($str_val,0,$count+1)
|
||||
. substr($str_val,$count+2);
|
||||
}
|
||||
$count += $addBytes;
|
||||
}
|
||||
}
|
||||
|
||||
return $str_val;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a string to the end of file.
|
||||
*
|
||||
* @param string $str
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function writeString($str)
|
||||
{
|
||||
/**
|
||||
* This implementation supports only Basic Multilingual Plane
|
||||
* (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
|
||||
* "supplementary characters" (characters whose code points are
|
||||
* greater than 0xFFFF)
|
||||
* Java 2 represents these characters as a pair of char (16-bit)
|
||||
* values, the first from the high-surrogates range (0xD800-0xDBFF),
|
||||
* the second from the low-surrogates range (0xDC00-0xDFFF). Then
|
||||
* they are encoded as usual UTF-8 characters in six bytes.
|
||||
* Standard UTF-8 representation uses four bytes for supplementary
|
||||
* characters.
|
||||
*/
|
||||
|
||||
// convert input to a string before iterating string characters
|
||||
settype($str, 'string');
|
||||
|
||||
$chars = $strlen = strlen($str);
|
||||
$containNullChars = false;
|
||||
|
||||
for ($count = 0; $count < $strlen; $count++ ) {
|
||||
/**
|
||||
* String is already in Java 2 representation.
|
||||
* We should only calculate actual string length and replace
|
||||
* \x00 by \xC0\x80
|
||||
*/
|
||||
if ((ord($str[$count]) & 0xC0) == 0xC0) {
|
||||
$addBytes = 1;
|
||||
if (ord($str[$count]) & 0x20 ) {
|
||||
$addBytes++;
|
||||
|
||||
// Never used. Java2 doesn't encode strings in four bytes
|
||||
// and we dont't support non-BMP characters
|
||||
if (ord($str[$count]) & 0x10 ) {
|
||||
$addBytes++;
|
||||
}
|
||||
}
|
||||
$chars -= $addBytes;
|
||||
|
||||
if (ord($str[$count]) == 0 ) {
|
||||
$containNullChars = true;
|
||||
}
|
||||
$count += $addBytes;
|
||||
}
|
||||
}
|
||||
|
||||
if ($chars < 0) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string');
|
||||
}
|
||||
|
||||
$this->writeVInt($chars);
|
||||
if ($containNullChars) {
|
||||
$this->_fwrite(str_replace($str, "\x00", "\xC0\x80"));
|
||||
} else {
|
||||
$this->_fwrite($str);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Reads binary data from the current position in the file
|
||||
* and advances the file pointer.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function readBinary()
|
||||
{
|
||||
return $this->_fread($this->readVInt());
|
||||
}
|
||||
}
|
220
thirdparty/Zend/Search/Lucene/Storage/File/Filesystem.php
vendored
Normal file
220
thirdparty/Zend/Search/Lucene/Storage/File/Filesystem.php
vendored
Normal file
@ -0,0 +1,220 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Storage
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Filesystem.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
/** Zend_Search_Lucene_Storage_File */
|
||||
require_once 'Zend/Search/Lucene/Storage/File.php';
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Storage
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Storage_File_Filesystem extends Zend_Search_Lucene_Storage_File
|
||||
{
|
||||
/**
|
||||
* Resource of the open file
|
||||
*
|
||||
* @var resource
|
||||
*/
|
||||
protected $_fileHandle;
|
||||
|
||||
|
||||
/**
|
||||
* Class constructor. Open the file.
|
||||
*
|
||||
* @param string $filename
|
||||
* @param string $mode
|
||||
*/
|
||||
public function __construct($filename, $mode='r+b')
|
||||
{
|
||||
global $php_errormsg;
|
||||
|
||||
if (strpos($mode, 'w') === false && !is_readable($filename)) {
|
||||
// opening for reading non-readable file
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('File \'' . $filename . '\' is not readable.');
|
||||
}
|
||||
|
||||
$trackErrors = ini_get('track_errors');
|
||||
ini_set('track_errors', '1');
|
||||
|
||||
$this->_fileHandle = @fopen($filename, $mode);
|
||||
|
||||
if ($this->_fileHandle === false) {
|
||||
ini_set('track_errors', $trackErrors);
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception($php_errormsg);
|
||||
}
|
||||
|
||||
ini_set('track_errors', $trackErrors);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the file position indicator and advances the file pointer.
|
||||
* The new position, measured in bytes from the beginning of the file,
|
||||
* is obtained by adding offset to the position specified by whence,
|
||||
* whose values are defined as follows:
|
||||
* SEEK_SET - Set position equal to offset bytes.
|
||||
* SEEK_CUR - Set position to current location plus offset.
|
||||
* SEEK_END - Set position to end-of-file plus offset. (To move to
|
||||
* a position before the end-of-file, you need to pass a negative value
|
||||
* in offset.)
|
||||
* SEEK_CUR is the only supported offset type for compound files
|
||||
*
|
||||
* Upon success, returns 0; otherwise, returns -1
|
||||
*
|
||||
* @param integer $offset
|
||||
* @param integer $whence
|
||||
* @return integer
|
||||
*/
|
||||
public function seek($offset, $whence=SEEK_SET)
|
||||
{
|
||||
return fseek($this->_fileHandle, $offset, $whence);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get file position.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function tell()
|
||||
{
|
||||
return ftell($this->_fileHandle);
|
||||
}
|
||||
|
||||
/**
|
||||
* Flush output.
|
||||
*
|
||||
* Returns true on success or false on failure.
|
||||
*
|
||||
* @return boolean
|
||||
*/
|
||||
public function flush()
|
||||
{
|
||||
return fflush($this->_fileHandle);
|
||||
}
|
||||
|
||||
/**
|
||||
* Close File object
|
||||
*/
|
||||
public function close()
|
||||
{
|
||||
if ($this->_fileHandle !== null ) {
|
||||
@fclose($this->_fileHandle);
|
||||
$this->_fileHandle = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the size of the already opened file
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function size()
|
||||
{
|
||||
$position = ftell($this->_fileHandle);
|
||||
fseek($this->_fileHandle, 0, SEEK_END);
|
||||
$size = ftell($this->_fileHandle);
|
||||
fseek($this->_fileHandle,$position);
|
||||
|
||||
return $size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a $length bytes from the file and advance the file pointer.
|
||||
*
|
||||
* @param integer $length
|
||||
* @return string
|
||||
*/
|
||||
protected function _fread($length=1)
|
||||
{
|
||||
if ($length == 0) {
|
||||
return '';
|
||||
}
|
||||
|
||||
if ($length < 1024) {
|
||||
return fread($this->_fileHandle, $length);
|
||||
}
|
||||
|
||||
$data = '';
|
||||
while ( $length > 0 && ($nextBlock = fread($this->_fileHandle, $length)) != false ) {
|
||||
$data .= $nextBlock;
|
||||
$length -= strlen($nextBlock);
|
||||
}
|
||||
return $data;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Writes $length number of bytes (all, if $length===null) to the end
|
||||
* of the file.
|
||||
*
|
||||
* @param string $data
|
||||
* @param integer $length
|
||||
*/
|
||||
protected function _fwrite($data, $length=null)
|
||||
{
|
||||
if ($length === null ) {
|
||||
fwrite($this->_fileHandle, $data);
|
||||
} else {
|
||||
fwrite($this->_fileHandle, $data, $length);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Lock file
|
||||
*
|
||||
* Lock type may be a LOCK_SH (shared lock) or a LOCK_EX (exclusive lock)
|
||||
*
|
||||
* @param integer $lockType
|
||||
* @param boolean $nonBlockingLock
|
||||
* @return boolean
|
||||
*/
|
||||
public function lock($lockType, $nonBlockingLock = false)
|
||||
{
|
||||
if ($nonBlockingLock) {
|
||||
return flock($this->_fileHandle, $lockType | LOCK_NB);
|
||||
} else {
|
||||
return flock($this->_fileHandle, $lockType);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Unlock file
|
||||
*
|
||||
* Returns true on success
|
||||
*
|
||||
* @return boolean
|
||||
*/
|
||||
public function unlock()
|
||||
{
|
||||
if ($this->_fileHandle !== null ) {
|
||||
return flock($this->_fileHandle, LOCK_UN);
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
601
thirdparty/Zend/Search/Lucene/Storage/File/Memory.php
vendored
Normal file
601
thirdparty/Zend/Search/Lucene/Storage/File/Memory.php
vendored
Normal file
@ -0,0 +1,601 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Storage
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: Memory.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
/** Zend_Search_Lucene_Storage_File */
|
||||
require_once 'Zend/Search/Lucene/Storage/File.php';
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Storage
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Storage_File_Memory extends Zend_Search_Lucene_Storage_File
|
||||
{
|
||||
/**
|
||||
* FileData
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_data;
|
||||
|
||||
/**
|
||||
* File Position
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_position = 0;
|
||||
|
||||
|
||||
/**
|
||||
* Object constractor
|
||||
*
|
||||
* @param string $data
|
||||
*/
|
||||
public function __construct($data)
|
||||
{
|
||||
$this->_data = $data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads $length number of bytes at the current position in the
|
||||
* file and advances the file pointer.
|
||||
*
|
||||
* @param integer $length
|
||||
* @return string
|
||||
*/
|
||||
protected function _fread($length = 1)
|
||||
{
|
||||
$returnValue = substr($this->_data, $this->_position, $length);
|
||||
$this->_position += $length;
|
||||
return $returnValue;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the file position indicator and advances the file pointer.
|
||||
* The new position, measured in bytes from the beginning of the file,
|
||||
* is obtained by adding offset to the position specified by whence,
|
||||
* whose values are defined as follows:
|
||||
* SEEK_SET - Set position equal to offset bytes.
|
||||
* SEEK_CUR - Set position to current location plus offset.
|
||||
* SEEK_END - Set position to end-of-file plus offset. (To move to
|
||||
* a position before the end-of-file, you need to pass a negative value
|
||||
* in offset.)
|
||||
* Upon success, returns 0; otherwise, returns -1
|
||||
*
|
||||
* @param integer $offset
|
||||
* @param integer $whence
|
||||
* @return integer
|
||||
*/
|
||||
public function seek($offset, $whence=SEEK_SET)
|
||||
{
|
||||
switch ($whence) {
|
||||
case SEEK_SET:
|
||||
$this->_position = $offset;
|
||||
break;
|
||||
|
||||
case SEEK_CUR:
|
||||
$this->_position += $offset;
|
||||
break;
|
||||
|
||||
case SEEK_END:
|
||||
$this->_position = strlen($this->_data);
|
||||
$this->_position += $offset;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get file position.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function tell()
|
||||
{
|
||||
return $this->_position;
|
||||
}
|
||||
|
||||
/**
|
||||
* Flush output.
|
||||
*
|
||||
* Returns true on success or false on failure.
|
||||
*
|
||||
* @return boolean
|
||||
*/
|
||||
public function flush()
|
||||
{
|
||||
// Do nothing
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes $length number of bytes (all, if $length===null) to the end
|
||||
* of the file.
|
||||
*
|
||||
* @param string $data
|
||||
* @param integer $length
|
||||
*/
|
||||
protected function _fwrite($data, $length=null)
|
||||
{
|
||||
// We do not need to check if file position points to the end of "file".
|
||||
// Only append operation is supported now
|
||||
|
||||
if ($length !== null) {
|
||||
$this->_data .= substr($data, 0, $length);
|
||||
} else {
|
||||
$this->_data .= $data;
|
||||
}
|
||||
|
||||
$this->_position = strlen($this->_data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Lock file
|
||||
*
|
||||
* Lock type may be a LOCK_SH (shared lock) or a LOCK_EX (exclusive lock)
|
||||
*
|
||||
* @param integer $lockType
|
||||
* @return boolean
|
||||
*/
|
||||
public function lock($lockType, $nonBlockinLock = false)
|
||||
{
|
||||
// Memory files can't be shared
|
||||
// do nothing
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Unlock file
|
||||
*/
|
||||
public function unlock()
|
||||
{
|
||||
// Memory files can't be shared
|
||||
// do nothing
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a byte from the current position in the file
|
||||
* and advances the file pointer.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function readByte()
|
||||
{
|
||||
return ord($this->_data[$this->_position++]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a byte to the end of the file.
|
||||
*
|
||||
* @param integer $byte
|
||||
*/
|
||||
public function writeByte($byte)
|
||||
{
|
||||
// We do not need to check if file position points to the end of "file".
|
||||
// Only append operation is supported now
|
||||
|
||||
$this->_data .= chr($byte);
|
||||
$this->_position = strlen($this->_data);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read num bytes from the current position in the file
|
||||
* and advances the file pointer.
|
||||
*
|
||||
* @param integer $num
|
||||
* @return string
|
||||
*/
|
||||
public function readBytes($num)
|
||||
{
|
||||
$returnValue = substr($this->_data, $this->_position, $num);
|
||||
$this->_position += $num;
|
||||
|
||||
return $returnValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes num bytes of data (all, if $num===null) to the end
|
||||
* of the string.
|
||||
*
|
||||
* @param string $data
|
||||
* @param integer $num
|
||||
*/
|
||||
public function writeBytes($data, $num=null)
|
||||
{
|
||||
// We do not need to check if file position points to the end of "file".
|
||||
// Only append operation is supported now
|
||||
|
||||
if ($num !== null) {
|
||||
$this->_data .= substr($data, 0, $num);
|
||||
} else {
|
||||
$this->_data .= $data;
|
||||
}
|
||||
|
||||
$this->_position = strlen($this->_data);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Reads an integer from the current position in the file
|
||||
* and advances the file pointer.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function readInt()
|
||||
{
|
||||
$str = substr($this->_data, $this->_position, 4);
|
||||
$this->_position += 4;
|
||||
|
||||
return ord($str[0]) << 24 |
|
||||
ord($str[1]) << 16 |
|
||||
ord($str[2]) << 8 |
|
||||
ord($str[3]);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Writes an integer to the end of file.
|
||||
*
|
||||
* @param integer $value
|
||||
*/
|
||||
public function writeInt($value)
|
||||
{
|
||||
// We do not need to check if file position points to the end of "file".
|
||||
// Only append operation is supported now
|
||||
|
||||
settype($value, 'integer');
|
||||
$this->_data .= chr($value>>24 & 0xFF) .
|
||||
chr($value>>16 & 0xFF) .
|
||||
chr($value>>8 & 0xFF) .
|
||||
chr($value & 0xFF);
|
||||
|
||||
$this->_position = strlen($this->_data);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a long integer from the current position in the file
|
||||
* and advances the file pointer.
|
||||
*
|
||||
* @return integer
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function readLong()
|
||||
{
|
||||
/**
|
||||
* Check, that we work in 64-bit mode.
|
||||
* fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
|
||||
*/
|
||||
if (PHP_INT_SIZE > 4) {
|
||||
$str = substr($this->_data, $this->_position, 8);
|
||||
$this->_position += 8;
|
||||
|
||||
return ord($str[0]) << 56 |
|
||||
ord($str[1]) << 48 |
|
||||
ord($str[2]) << 40 |
|
||||
ord($str[3]) << 32 |
|
||||
ord($str[4]) << 24 |
|
||||
ord($str[5]) << 16 |
|
||||
ord($str[6]) << 8 |
|
||||
ord($str[7]);
|
||||
} else {
|
||||
return $this->readLong32Bit();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes long integer to the end of file
|
||||
*
|
||||
* @param integer $value
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function writeLong($value)
|
||||
{
|
||||
// We do not need to check if file position points to the end of "file".
|
||||
// Only append operation is supported now
|
||||
|
||||
/**
|
||||
* Check, that we work in 64-bit mode.
|
||||
* fseek() and ftell() use long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
|
||||
*/
|
||||
if (PHP_INT_SIZE > 4) {
|
||||
settype($value, 'integer');
|
||||
$this->_data .= chr($value>>56 & 0xFF) .
|
||||
chr($value>>48 & 0xFF) .
|
||||
chr($value>>40 & 0xFF) .
|
||||
chr($value>>32 & 0xFF) .
|
||||
chr($value>>24 & 0xFF) .
|
||||
chr($value>>16 & 0xFF) .
|
||||
chr($value>>8 & 0xFF) .
|
||||
chr($value & 0xFF);
|
||||
} else {
|
||||
$this->writeLong32Bit($value);
|
||||
}
|
||||
|
||||
$this->_position = strlen($this->_data);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a long integer from the current position in the file,
|
||||
* advances the file pointer and return it as float (for 32-bit platforms).
|
||||
*
|
||||
* @return integer|float
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function readLong32Bit()
|
||||
{
|
||||
$wordHigh = $this->readInt();
|
||||
$wordLow = $this->readInt();
|
||||
|
||||
if ($wordHigh & (int)0x80000000) {
|
||||
// It's a negative value since the highest bit is set
|
||||
if ($wordHigh == (int)0xFFFFFFFF && ($wordLow & (int)0x80000000)) {
|
||||
return $wordLow;
|
||||
} else {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if ($wordLow < 0) {
|
||||
// Value is large than 0x7FFF FFFF. Represent low word as float.
|
||||
$wordLow &= 0x7FFFFFFF;
|
||||
$wordLow += (float)0x80000000;
|
||||
}
|
||||
|
||||
if ($wordHigh == 0) {
|
||||
// Return value as integer if possible
|
||||
return $wordLow;
|
||||
}
|
||||
|
||||
return $wordHigh*(float)0x100000000/* 0x00000001 00000000 */ + $wordLow;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Writes long integer to the end of file (32-bit platforms implementation)
|
||||
*
|
||||
* @param integer|float $value
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function writeLong32Bit($value)
|
||||
{
|
||||
if ($value < (int)0x80000000) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
|
||||
}
|
||||
|
||||
if ($value < 0) {
|
||||
$wordHigh = (int)0xFFFFFFFF;
|
||||
$wordLow = (int)$value;
|
||||
} else {
|
||||
$wordHigh = (int)($value/(float)0x100000000/* 0x00000001 00000000 */);
|
||||
$wordLow = $value - $wordHigh*(float)0x100000000/* 0x00000001 00000000 */;
|
||||
|
||||
if ($wordLow > 0x7FFFFFFF) {
|
||||
// Highest bit of low word is set. Translate it to the corresponding negative integer value
|
||||
$wordLow -= 0x80000000;
|
||||
$wordLow |= 0x80000000;
|
||||
}
|
||||
}
|
||||
|
||||
$this->writeInt($wordHigh);
|
||||
$this->writeInt($wordLow);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a variable-length integer from the current
|
||||
* position in the file and advances the file pointer.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function readVInt()
|
||||
{
|
||||
$nextByte = ord($this->_data[$this->_position++]);
|
||||
$val = $nextByte & 0x7F;
|
||||
|
||||
for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) {
|
||||
$nextByte = ord($this->_data[$this->_position++]);
|
||||
$val |= ($nextByte & 0x7F) << $shift;
|
||||
}
|
||||
return $val;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a variable-length integer to the end of file.
|
||||
*
|
||||
* @param integer $value
|
||||
*/
|
||||
public function writeVInt($value)
|
||||
{
|
||||
// We do not need to check if file position points to the end of "file".
|
||||
// Only append operation is supported now
|
||||
|
||||
settype($value, 'integer');
|
||||
while ($value > 0x7F) {
|
||||
$this->_data .= chr( ($value & 0x7F)|0x80 );
|
||||
$value >>= 7;
|
||||
}
|
||||
$this->_data .= chr($value);
|
||||
|
||||
$this->_position = strlen($this->_data);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Reads a string from the current position in the file
|
||||
* and advances the file pointer.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function readString()
|
||||
{
|
||||
$strlen = $this->readVInt();
|
||||
if ($strlen == 0) {
|
||||
return '';
|
||||
} else {
|
||||
/**
|
||||
* This implementation supports only Basic Multilingual Plane
|
||||
* (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
|
||||
* "supplementary characters" (characters whose code points are
|
||||
* greater than 0xFFFF)
|
||||
* Java 2 represents these characters as a pair of char (16-bit)
|
||||
* values, the first from the high-surrogates range (0xD800-0xDBFF),
|
||||
* the second from the low-surrogates range (0xDC00-0xDFFF). Then
|
||||
* they are encoded as usual UTF-8 characters in six bytes.
|
||||
* Standard UTF-8 representation uses four bytes for supplementary
|
||||
* characters.
|
||||
*/
|
||||
|
||||
$str_val = substr($this->_data, $this->_position, $strlen);
|
||||
$this->_position += $strlen;
|
||||
|
||||
for ($count = 0; $count < $strlen; $count++ ) {
|
||||
if (( ord($str_val[$count]) & 0xC0 ) == 0xC0) {
|
||||
$addBytes = 1;
|
||||
if (ord($str_val[$count]) & 0x20 ) {
|
||||
$addBytes++;
|
||||
|
||||
// Never used. Java2 doesn't encode strings in four bytes
|
||||
if (ord($str_val[$count]) & 0x10 ) {
|
||||
$addBytes++;
|
||||
}
|
||||
}
|
||||
$str_val .= substr($this->_data, $this->_position, $addBytes);
|
||||
$this->_position += $addBytes;
|
||||
$strlen += $addBytes;
|
||||
|
||||
// Check for null character. Java2 encodes null character
|
||||
// in two bytes.
|
||||
if (ord($str_val[$count]) == 0xC0 &&
|
||||
ord($str_val[$count+1]) == 0x80 ) {
|
||||
$str_val[$count] = 0;
|
||||
$str_val = substr($str_val,0,$count+1)
|
||||
. substr($str_val,$count+2);
|
||||
}
|
||||
$count += $addBytes;
|
||||
}
|
||||
}
|
||||
|
||||
return $str_val;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a string to the end of file.
|
||||
*
|
||||
* @param string $str
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function writeString($str)
|
||||
{
|
||||
/**
|
||||
* This implementation supports only Basic Multilingual Plane
|
||||
* (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
|
||||
* "supplementary characters" (characters whose code points are
|
||||
* greater than 0xFFFF)
|
||||
* Java 2 represents these characters as a pair of char (16-bit)
|
||||
* values, the first from the high-surrogates range (0xD800-0xDBFF),
|
||||
* the second from the low-surrogates range (0xDC00-0xDFFF). Then
|
||||
* they are encoded as usual UTF-8 characters in six bytes.
|
||||
* Standard UTF-8 representation uses four bytes for supplementary
|
||||
* characters.
|
||||
*/
|
||||
|
||||
// We do not need to check if file position points to the end of "file".
|
||||
// Only append operation is supported now
|
||||
|
||||
// convert input to a string before iterating string characters
|
||||
settype($str, 'string');
|
||||
|
||||
$chars = $strlen = strlen($str);
|
||||
$containNullChars = false;
|
||||
|
||||
for ($count = 0; $count < $strlen; $count++ ) {
|
||||
/**
|
||||
* String is already in Java 2 representation.
|
||||
* We should only calculate actual string length and replace
|
||||
* \x00 by \xC0\x80
|
||||
*/
|
||||
if ((ord($str[$count]) & 0xC0) == 0xC0) {
|
||||
$addBytes = 1;
|
||||
if (ord($str[$count]) & 0x20 ) {
|
||||
$addBytes++;
|
||||
|
||||
// Never used. Java2 doesn't encode strings in four bytes
|
||||
// and we dont't support non-BMP characters
|
||||
if (ord($str[$count]) & 0x10 ) {
|
||||
$addBytes++;
|
||||
}
|
||||
}
|
||||
$chars -= $addBytes;
|
||||
|
||||
if (ord($str[$count]) == 0 ) {
|
||||
$containNullChars = true;
|
||||
}
|
||||
$count += $addBytes;
|
||||
}
|
||||
}
|
||||
|
||||
if ($chars < 0) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string');
|
||||
}
|
||||
|
||||
$this->writeVInt($chars);
|
||||
if ($containNullChars) {
|
||||
$this->_data .= str_replace($str, "\x00", "\xC0\x80");
|
||||
|
||||
} else {
|
||||
$this->_data .= $str;
|
||||
}
|
||||
|
||||
$this->_position = strlen($this->_data);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Reads binary data from the current position in the file
|
||||
* and advances the file pointer.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function readBinary()
|
||||
{
|
||||
$length = $this->readVInt();
|
||||
$returnValue = substr($this->_data, $this->_position, $length);
|
||||
$this->_position += $length;
|
||||
return $returnValue;
|
||||
}
|
||||
}
|
||||
|
176
thirdparty/Zend/Search/Lucene/TermStreamsPriorityQueue.php
vendored
Normal file
176
thirdparty/Zend/Search/Lucene/TermStreamsPriorityQueue.php
vendored
Normal file
@ -0,0 +1,176 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @version $Id: TermStreamsPriorityQueue.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||||
*/
|
||||
|
||||
/** Zend_Search_Lucene_Index_TermsStream_Interface */
|
||||
require_once 'Zend/Search/Lucene/Index/TermsStream/Interface.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_TermStreamsPriorityQueue implements Zend_Search_Lucene_Index_TermsStream_Interface
|
||||
{
|
||||
/**
|
||||
* Array of term streams (Zend_Search_Lucene_Index_TermsStream_Interface objects)
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $_termStreams;
|
||||
|
||||
/**
|
||||
* Terms stream queue
|
||||
*
|
||||
* @var Zend_Search_Lucene_Index_TermsPriorityQueue
|
||||
*/
|
||||
protected $_termsStreamQueue = null;
|
||||
|
||||
/**
|
||||
* Last Term in a terms stream
|
||||
*
|
||||
* @var Zend_Search_Lucene_Index_Term
|
||||
*/
|
||||
protected $_lastTerm = null;
|
||||
|
||||
|
||||
/**
|
||||
* Object constructor
|
||||
*
|
||||
* @param array $termStreams array of term streams (Zend_Search_Lucene_Index_TermsStream_Interface objects)
|
||||
*/
|
||||
public function __construct(array $termStreams)
|
||||
{
|
||||
$this->_termStreams = $termStreams;
|
||||
|
||||
$this->resetTermsStream();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset terms stream.
|
||||
*/
|
||||
public function resetTermsStream()
|
||||
{
|
||||
/** Zend_Search_Lucene_Index_TermsPriorityQueue */
|
||||
require_once 'Zend/Search/Lucene/Index/TermsPriorityQueue.php';
|
||||
|
||||
$this->_termsStreamQueue = new Zend_Search_Lucene_Index_TermsPriorityQueue();
|
||||
|
||||
foreach ($this->_termStreams as $termStream) {
|
||||
$termStream->resetTermsStream();
|
||||
|
||||
// Skip "empty" containers
|
||||
if ($termStream->currentTerm() !== null) {
|
||||
$this->_termsStreamQueue->put($termStream);
|
||||
}
|
||||
}
|
||||
|
||||
$this->nextTerm();
|
||||
}
|
||||
|
||||
/**
|
||||
* Skip terms stream up to specified term preffix.
|
||||
*
|
||||
* Prefix contains fully specified field info and portion of searched term
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $prefix
|
||||
*/
|
||||
public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
|
||||
{
|
||||
$termStreams = array();
|
||||
|
||||
while (($termStream = $this->_termsStreamQueue->pop()) !== null) {
|
||||
$termStreams[] = $termStream;
|
||||
}
|
||||
|
||||
foreach ($termStreams as $termStream) {
|
||||
$termStream->skipTo($prefix);
|
||||
|
||||
if ($termStream->currentTerm() !== null) {
|
||||
$this->_termsStreamQueue->put($termStream);
|
||||
}
|
||||
}
|
||||
|
||||
$this->nextTerm();
|
||||
}
|
||||
|
||||
/**
|
||||
* Scans term streams and returns next term
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_Term|null
|
||||
*/
|
||||
public function nextTerm()
|
||||
{
|
||||
while (($termStream = $this->_termsStreamQueue->pop()) !== null) {
|
||||
if ($this->_termsStreamQueue->top() === null ||
|
||||
$this->_termsStreamQueue->top()->currentTerm()->key() !=
|
||||
$termStream->currentTerm()->key()) {
|
||||
// We got new term
|
||||
$this->_lastTerm = $termStream->currentTerm();
|
||||
|
||||
if ($termStream->nextTerm() !== null) {
|
||||
// Put segment back into the priority queue
|
||||
$this->_termsStreamQueue->put($termStream);
|
||||
}
|
||||
|
||||
return $this->_lastTerm;
|
||||
}
|
||||
|
||||
if ($termStream->nextTerm() !== null) {
|
||||
// Put segment back into the priority queue
|
||||
$this->_termsStreamQueue->put($termStream);
|
||||
}
|
||||
}
|
||||
|
||||
// End of stream
|
||||
$this->_lastTerm = null;
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns term in current position
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_Term|null
|
||||
*/
|
||||
public function currentTerm()
|
||||
{
|
||||
return $this->_lastTerm;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close terms stream
|
||||
*
|
||||
* Should be used for resources clean up if stream is not read up to the end
|
||||
*/
|
||||
public function closeTermsStream()
|
||||
{
|
||||
while (($termStream = $this->_termsStreamQueue->pop()) !== null) {
|
||||
$termStream->closeTermsStream();
|
||||
}
|
||||
|
||||
$this->_termsStreamQueue = null;
|
||||
$this->_lastTerm = null;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user