mirror of
https://github.com/silverstripe/silverstripe-docsviewer
synced 2024-10-22 09:05:56 +00:00
816 lines
27 KiB
PHP
816 lines
27 KiB
PHP
|
<?php
|
||
|
/**
|
||
|
* Zend Framework
|
||
|
*
|
||
|
* LICENSE
|
||
|
*
|
||
|
* This source file is subject to the new BSD license that is bundled
|
||
|
* with this package in the file LICENSE.txt.
|
||
|
* It is also available through the world-wide-web at this URL:
|
||
|
* http://framework.zend.com/license/new-bsd
|
||
|
* If you did not receive a copy of the license and are unable to
|
||
|
* obtain it through the world-wide-web, please send an email
|
||
|
* to license@zend.com so we can send you a copy immediately.
|
||
|
*
|
||
|
* @category Zend
|
||
|
* @package Zend_Search_Lucene
|
||
|
* @subpackage Search
|
||
|
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||
|
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||
|
* @version $Id: Boolean.php 20096 2010-01-06 02:05:09Z bkarwin $
|
||
|
*/
|
||
|
|
||
|
|
||
|
/** Zend_Search_Lucene_Search_Query */
|
||
|
require_once 'Zend/Search/Lucene/Search/Query.php';
|
||
|
|
||
|
|
||
|
/**
|
||
|
* @category Zend
|
||
|
* @package Zend_Search_Lucene
|
||
|
* @subpackage Search
|
||
|
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
|
||
|
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||
|
*/
|
||
|
class Zend_Search_Lucene_Search_Query_Boolean extends Zend_Search_Lucene_Search_Query
|
||
|
{
|
||
|
|
||
|
/**
|
||
|
* Subqueries
|
||
|
* Array of Zend_Search_Lucene_Search_Query
|
||
|
*
|
||
|
* @var array
|
||
|
*/
|
||
|
private $_subqueries = array();
|
||
|
|
||
|
/**
|
||
|
* Subqueries signs.
|
||
|
* If true then subquery is required.
|
||
|
* If false then subquery is prohibited.
|
||
|
* If null then subquery is neither prohibited, nor required
|
||
|
*
|
||
|
* If array is null then all subqueries are required
|
||
|
*
|
||
|
* @var array
|
||
|
*/
|
||
|
private $_signs = array();
|
||
|
|
||
|
/**
|
||
|
* Result vector.
|
||
|
*
|
||
|
* @var array
|
||
|
*/
|
||
|
private $_resVector = null;
|
||
|
|
||
|
/**
|
||
|
* A score factor based on the fraction of all query subqueries
|
||
|
* that a document contains.
|
||
|
* float for conjunction queries
|
||
|
* array of float for non conjunction queries
|
||
|
*
|
||
|
* @var mixed
|
||
|
*/
|
||
|
private $_coord = null;
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Class constructor. Create a new Boolean query object.
|
||
|
*
|
||
|
* if $signs array is omitted then all subqueries are required
|
||
|
* it differs from addSubquery() behavior, but should never be used
|
||
|
*
|
||
|
* @param array $subqueries Array of Zend_Search_Search_Query objects
|
||
|
* @param array $signs Array of signs. Sign is boolean|null.
|
||
|
* @return void
|
||
|
*/
|
||
|
public function __construct($subqueries = null, $signs = null)
|
||
|
{
|
||
|
if (is_array($subqueries)) {
|
||
|
$this->_subqueries = $subqueries;
|
||
|
|
||
|
$this->_signs = null;
|
||
|
// Check if all subqueries are required
|
||
|
if (is_array($signs)) {
|
||
|
foreach ($signs as $sign ) {
|
||
|
if ($sign !== true) {
|
||
|
$this->_signs = $signs;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Add a $subquery (Zend_Search_Lucene_Search_Query) to this query.
|
||
|
*
|
||
|
* The sign is specified as:
|
||
|
* TRUE - subquery is required
|
||
|
* FALSE - subquery is prohibited
|
||
|
* NULL - subquery is neither prohibited, nor required
|
||
|
*
|
||
|
* @param Zend_Search_Lucene_Search_Query $subquery
|
||
|
* @param boolean|null $sign
|
||
|
* @return void
|
||
|
*/
|
||
|
public function addSubquery(Zend_Search_Lucene_Search_Query $subquery, $sign=null) {
|
||
|
if ($sign !== true || $this->_signs !== null) { // Skip, if all subqueries are required
|
||
|
if ($this->_signs === null) { // Check, If all previous subqueries are required
|
||
|
$this->_signs = array();
|
||
|
foreach ($this->_subqueries as $prevSubquery) {
|
||
|
$this->_signs[] = true;
|
||
|
}
|
||
|
}
|
||
|
$this->_signs[] = $sign;
|
||
|
}
|
||
|
|
||
|
$this->_subqueries[] = $subquery;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Re-write queries into primitive queries
|
||
|
*
|
||
|
* @param Zend_Search_Lucene_Interface $index
|
||
|
* @return Zend_Search_Lucene_Search_Query
|
||
|
*/
|
||
|
public function rewrite(Zend_Search_Lucene_Interface $index)
|
||
|
{
|
||
|
$query = new Zend_Search_Lucene_Search_Query_Boolean();
|
||
|
$query->setBoost($this->getBoost());
|
||
|
|
||
|
foreach ($this->_subqueries as $subqueryId => $subquery) {
|
||
|
$query->addSubquery($subquery->rewrite($index),
|
||
|
($this->_signs === null)? true : $this->_signs[$subqueryId]);
|
||
|
}
|
||
|
|
||
|
return $query;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Optimize query in the context of specified index
|
||
|
*
|
||
|
* @param Zend_Search_Lucene_Interface $index
|
||
|
* @return Zend_Search_Lucene_Search_Query
|
||
|
*/
|
||
|
public function optimize(Zend_Search_Lucene_Interface $index)
|
||
|
{
|
||
|
$subqueries = array();
|
||
|
$signs = array();
|
||
|
|
||
|
// Optimize all subqueries
|
||
|
foreach ($this->_subqueries as $id => $subquery) {
|
||
|
$subqueries[] = $subquery->optimize($index);
|
||
|
$signs[] = ($this->_signs === null)? true : $this->_signs[$id];
|
||
|
}
|
||
|
|
||
|
// Remove insignificant subqueries
|
||
|
foreach ($subqueries as $id => $subquery) {
|
||
|
if ($subquery instanceof Zend_Search_Lucene_Search_Query_Insignificant) {
|
||
|
// Insignificant subquery has to be removed anyway
|
||
|
unset($subqueries[$id]);
|
||
|
unset($signs[$id]);
|
||
|
}
|
||
|
}
|
||
|
if (count($subqueries) == 0) {
|
||
|
// Boolean query doesn't has non-insignificant subqueries
|
||
|
require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
|
||
|
return new Zend_Search_Lucene_Search_Query_Insignificant();
|
||
|
}
|
||
|
// Check if all non-insignificant subqueries are prohibited
|
||
|
$allProhibited = true;
|
||
|
foreach ($signs as $sign) {
|
||
|
if ($sign !== false) {
|
||
|
$allProhibited = false;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
if ($allProhibited) {
|
||
|
require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
|
||
|
return new Zend_Search_Lucene_Search_Query_Insignificant();
|
||
|
}
|
||
|
|
||
|
|
||
|
// Check for empty subqueries
|
||
|
foreach ($subqueries as $id => $subquery) {
|
||
|
if ($subquery instanceof Zend_Search_Lucene_Search_Query_Empty) {
|
||
|
if ($signs[$id] === true) {
|
||
|
// Matching is required, but is actually empty
|
||
|
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
|
||
|
return new Zend_Search_Lucene_Search_Query_Empty();
|
||
|
} else {
|
||
|
// Matching is optional or prohibited, but is empty
|
||
|
// Remove it from subqueries and signs list
|
||
|
unset($subqueries[$id]);
|
||
|
unset($signs[$id]);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Check, if reduced subqueries list is empty
|
||
|
if (count($subqueries) == 0) {
|
||
|
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
|
||
|
return new Zend_Search_Lucene_Search_Query_Empty();
|
||
|
}
|
||
|
|
||
|
// Check if all non-empty subqueries are prohibited
|
||
|
$allProhibited = true;
|
||
|
foreach ($signs as $sign) {
|
||
|
if ($sign !== false) {
|
||
|
$allProhibited = false;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
if ($allProhibited) {
|
||
|
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
|
||
|
return new Zend_Search_Lucene_Search_Query_Empty();
|
||
|
}
|
||
|
|
||
|
|
||
|
// Check, if reduced subqueries list has only one entry
|
||
|
if (count($subqueries) == 1) {
|
||
|
// It's a query with only one required or optional clause
|
||
|
// (it's already checked, that it's not a prohibited clause)
|
||
|
|
||
|
if ($this->getBoost() == 1) {
|
||
|
return reset($subqueries);
|
||
|
}
|
||
|
|
||
|
$optimizedQuery = clone reset($subqueries);
|
||
|
$optimizedQuery->setBoost($optimizedQuery->getBoost()*$this->getBoost());
|
||
|
|
||
|
return $optimizedQuery;
|
||
|
}
|
||
|
|
||
|
|
||
|
// Prepare first candidate for optimized query
|
||
|
$optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs);
|
||
|
$optimizedQuery->setBoost($this->getBoost());
|
||
|
|
||
|
|
||
|
$terms = array();
|
||
|
$tsigns = array();
|
||
|
$boostFactors = array();
|
||
|
|
||
|
// Try to decompose term and multi-term subqueries
|
||
|
foreach ($subqueries as $id => $subquery) {
|
||
|
if ($subquery instanceof Zend_Search_Lucene_Search_Query_Term) {
|
||
|
$terms[] = $subquery->getTerm();
|
||
|
$tsigns[] = $signs[$id];
|
||
|
$boostFactors[] = $subquery->getBoost();
|
||
|
|
||
|
// remove subquery from a subqueries list
|
||
|
unset($subqueries[$id]);
|
||
|
unset($signs[$id]);
|
||
|
} else if ($subquery instanceof Zend_Search_Lucene_Search_Query_MultiTerm) {
|
||
|
$subTerms = $subquery->getTerms();
|
||
|
$subSigns = $subquery->getSigns();
|
||
|
|
||
|
if ($signs[$id] === true) {
|
||
|
// It's a required multi-term subquery.
|
||
|
// Something like '... +(+term1 -term2 term3 ...) ...'
|
||
|
|
||
|
// Multi-term required subquery can be decomposed only if it contains
|
||
|
// required terms and doesn't contain prohibited terms:
|
||
|
// ... +(+term1 term2 ...) ... => ... +term1 term2 ...
|
||
|
//
|
||
|
// Check this
|
||
|
$hasRequired = false;
|
||
|
$hasProhibited = false;
|
||
|
if ($subSigns === null) {
|
||
|
// All subterms are required
|
||
|
$hasRequired = true;
|
||
|
} else {
|
||
|
foreach ($subSigns as $sign) {
|
||
|
if ($sign === true) {
|
||
|
$hasRequired = true;
|
||
|
} else if ($sign === false) {
|
||
|
$hasProhibited = true;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Continue if subquery has prohibited terms or doesn't have required terms
|
||
|
if ($hasProhibited || !$hasRequired) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
foreach ($subTerms as $termId => $term) {
|
||
|
$terms[] = $term;
|
||
|
$tsigns[] = ($subSigns === null)? true : $subSigns[$termId];
|
||
|
$boostFactors[] = $subquery->getBoost();
|
||
|
}
|
||
|
|
||
|
// remove subquery from a subqueries list
|
||
|
unset($subqueries[$id]);
|
||
|
unset($signs[$id]);
|
||
|
|
||
|
} else { // $signs[$id] === null || $signs[$id] === false
|
||
|
// It's an optional or prohibited multi-term subquery.
|
||
|
// Something like '... (+term1 -term2 term3 ...) ...'
|
||
|
// or
|
||
|
// something like '... -(+term1 -term2 term3 ...) ...'
|
||
|
|
||
|
// Multi-term optional and required subqueries can be decomposed
|
||
|
// only if all terms are optional.
|
||
|
//
|
||
|
// Check if all terms are optional.
|
||
|
$onlyOptional = true;
|
||
|
if ($subSigns === null) {
|
||
|
// All subterms are required
|
||
|
$onlyOptional = false;
|
||
|
} else {
|
||
|
foreach ($subSigns as $sign) {
|
||
|
if ($sign !== null) {
|
||
|
$onlyOptional = false;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Continue if non-optional terms are presented in this multi-term subquery
|
||
|
if (!$onlyOptional) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
foreach ($subTerms as $termId => $term) {
|
||
|
$terms[] = $term;
|
||
|
$tsigns[] = ($signs[$id] === null)? null /* optional */ :
|
||
|
false /* prohibited */;
|
||
|
$boostFactors[] = $subquery->getBoost();
|
||
|
}
|
||
|
|
||
|
// remove subquery from a subqueries list
|
||
|
unset($subqueries[$id]);
|
||
|
unset($signs[$id]);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
// Check, if there are no decomposed subqueries
|
||
|
if (count($terms) == 0 ) {
|
||
|
// return prepared candidate
|
||
|
return $optimizedQuery;
|
||
|
}
|
||
|
|
||
|
|
||
|
// Check, if all subqueries have been decomposed and all terms has the same boost factor
|
||
|
if (count($subqueries) == 0 && count(array_unique($boostFactors)) == 1) {
|
||
|
require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
|
||
|
$optimizedQuery = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $tsigns);
|
||
|
$optimizedQuery->setBoost(reset($boostFactors)*$this->getBoost());
|
||
|
|
||
|
return $optimizedQuery;
|
||
|
}
|
||
|
|
||
|
|
||
|
// This boolean query can't be transformed to Term/MultiTerm query and still contains
|
||
|
// several subqueries
|
||
|
|
||
|
// Separate prohibited terms
|
||
|
$prohibitedTerms = array();
|
||
|
foreach ($terms as $id => $term) {
|
||
|
if ($tsigns[$id] === false) {
|
||
|
$prohibitedTerms[] = $term;
|
||
|
|
||
|
unset($terms[$id]);
|
||
|
unset($tsigns[$id]);
|
||
|
unset($boostFactors[$id]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (count($terms) == 1) {
|
||
|
require_once 'Zend/Search/Lucene/Search/Query/Term.php';
|
||
|
$clause = new Zend_Search_Lucene_Search_Query_Term(reset($terms));
|
||
|
$clause->setBoost(reset($boostFactors));
|
||
|
|
||
|
$subqueries[] = $clause;
|
||
|
$signs[] = reset($tsigns);
|
||
|
|
||
|
// Clear terms list
|
||
|
$terms = array();
|
||
|
} else if (count($terms) > 1 && count(array_unique($boostFactors)) == 1) {
|
||
|
require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
|
||
|
$clause = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $tsigns);
|
||
|
$clause->setBoost(reset($boostFactors));
|
||
|
|
||
|
$subqueries[] = $clause;
|
||
|
// Clause sign is 'required' if clause contains required terms. 'Optional' otherwise.
|
||
|
$signs[] = (in_array(true, $tsigns))? true : null;
|
||
|
|
||
|
// Clear terms list
|
||
|
$terms = array();
|
||
|
}
|
||
|
|
||
|
if (count($prohibitedTerms) == 1) {
|
||
|
// (boost factors are not significant for prohibited clauses)
|
||
|
require_once 'Zend/Search/Lucene/Search/Query/Term.php';
|
||
|
$subqueries[] = new Zend_Search_Lucene_Search_Query_Term(reset($prohibitedTerms));
|
||
|
$signs[] = false;
|
||
|
|
||
|
// Clear prohibited terms list
|
||
|
$prohibitedTerms = array();
|
||
|
} else if (count($prohibitedTerms) > 1) {
|
||
|
// prepare signs array
|
||
|
$prohibitedSigns = array();
|
||
|
foreach ($prohibitedTerms as $id => $term) {
|
||
|
// all prohibited term are grouped as optional into multi-term query
|
||
|
$prohibitedSigns[$id] = null;
|
||
|
}
|
||
|
|
||
|
// (boost factors are not significant for prohibited clauses)
|
||
|
require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
|
||
|
$subqueries[] = new Zend_Search_Lucene_Search_Query_MultiTerm($prohibitedTerms, $prohibitedSigns);
|
||
|
// Clause sign is 'prohibited'
|
||
|
$signs[] = false;
|
||
|
|
||
|
// Clear terms list
|
||
|
$prohibitedTerms = array();
|
||
|
}
|
||
|
|
||
|
/** @todo Group terms with the same boost factors together */
|
||
|
|
||
|
// Check, that all terms are processed
|
||
|
// Replace candidate for optimized query
|
||
|
if (count($terms) == 0 && count($prohibitedTerms) == 0) {
|
||
|
$optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs);
|
||
|
$optimizedQuery->setBoost($this->getBoost());
|
||
|
}
|
||
|
|
||
|
return $optimizedQuery;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Returns subqueries
|
||
|
*
|
||
|
* @return array
|
||
|
*/
|
||
|
public function getSubqueries()
|
||
|
{
|
||
|
return $this->_subqueries;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Return subqueries signs
|
||
|
*
|
||
|
* @return array
|
||
|
*/
|
||
|
public function getSigns()
|
||
|
{
|
||
|
return $this->_signs;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Constructs an appropriate Weight implementation for this query.
|
||
|
*
|
||
|
* @param Zend_Search_Lucene_Interface $reader
|
||
|
* @return Zend_Search_Lucene_Search_Weight
|
||
|
*/
|
||
|
public function createWeight(Zend_Search_Lucene_Interface $reader)
|
||
|
{
|
||
|
require_once 'Zend/Search/Lucene/Search/Weight/Boolean.php';
|
||
|
$this->_weight = new Zend_Search_Lucene_Search_Weight_Boolean($this, $reader);
|
||
|
return $this->_weight;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Calculate result vector for Conjunction query
|
||
|
* (like '<subquery1> AND <subquery2> AND <subquery3>')
|
||
|
*/
|
||
|
private function _calculateConjunctionResult()
|
||
|
{
|
||
|
$this->_resVector = null;
|
||
|
|
||
|
if (count($this->_subqueries) == 0) {
|
||
|
$this->_resVector = array();
|
||
|
}
|
||
|
|
||
|
$resVectors = array();
|
||
|
$resVectorsSizes = array();
|
||
|
$resVectorsIds = array(); // is used to prevent arrays comparison
|
||
|
foreach ($this->_subqueries as $subqueryId => $subquery) {
|
||
|
$resVectors[] = $subquery->matchedDocs();
|
||
|
$resVectorsSizes[] = count(end($resVectors));
|
||
|
$resVectorsIds[] = $subqueryId;
|
||
|
}
|
||
|
// sort resvectors in order of subquery cardinality increasing
|
||
|
array_multisort($resVectorsSizes, SORT_ASC, SORT_NUMERIC,
|
||
|
$resVectorsIds, SORT_ASC, SORT_NUMERIC,
|
||
|
$resVectors);
|
||
|
|
||
|
foreach ($resVectors as $nextResVector) {
|
||
|
if($this->_resVector === null) {
|
||
|
$this->_resVector = $nextResVector;
|
||
|
} else {
|
||
|
//$this->_resVector = array_intersect_key($this->_resVector, $nextResVector);
|
||
|
|
||
|
/**
|
||
|
* This code is used as workaround for array_intersect_key() slowness problem.
|
||
|
*/
|
||
|
$updatedVector = array();
|
||
|
foreach ($this->_resVector as $id => $value) {
|
||
|
if (isset($nextResVector[$id])) {
|
||
|
$updatedVector[$id] = $value;
|
||
|
}
|
||
|
}
|
||
|
$this->_resVector = $updatedVector;
|
||
|
}
|
||
|
|
||
|
if (count($this->_resVector) == 0) {
|
||
|
// Empty result set, we don't need to check other terms
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// ksort($this->_resVector, SORT_NUMERIC);
|
||
|
// Used algorithm doesn't change elements order
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Calculate result vector for non Conjunction query
|
||
|
* (like '<subquery1> AND <subquery2> AND NOT <subquery3> OR <subquery4>')
|
||
|
*/
|
||
|
private function _calculateNonConjunctionResult()
|
||
|
{
|
||
|
$requiredVectors = array();
|
||
|
$requiredVectorsSizes = array();
|
||
|
$requiredVectorsIds = array(); // is used to prevent arrays comparison
|
||
|
|
||
|
$optional = array();
|
||
|
|
||
|
foreach ($this->_subqueries as $subqueryId => $subquery) {
|
||
|
if ($this->_signs[$subqueryId] === true) {
|
||
|
// required
|
||
|
$requiredVectors[] = $subquery->matchedDocs();
|
||
|
$requiredVectorsSizes[] = count(end($requiredVectors));
|
||
|
$requiredVectorsIds[] = $subqueryId;
|
||
|
} elseif ($this->_signs[$subqueryId] === false) {
|
||
|
// prohibited
|
||
|
// Do nothing. matchedDocs() may include non-matching id's
|
||
|
// Calculating prohibited vector may take significant time, but do not affect the result
|
||
|
// Skipped.
|
||
|
} else {
|
||
|
// neither required, nor prohibited
|
||
|
// array union
|
||
|
$optional += $subquery->matchedDocs();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// sort resvectors in order of subquery cardinality increasing
|
||
|
array_multisort($requiredVectorsSizes, SORT_ASC, SORT_NUMERIC,
|
||
|
$requiredVectorsIds, SORT_ASC, SORT_NUMERIC,
|
||
|
$requiredVectors);
|
||
|
|
||
|
$required = null;
|
||
|
foreach ($requiredVectors as $nextResVector) {
|
||
|
if($required === null) {
|
||
|
$required = $nextResVector;
|
||
|
} else {
|
||
|
//$required = array_intersect_key($required, $nextResVector);
|
||
|
|
||
|
/**
|
||
|
* This code is used as workaround for array_intersect_key() slowness problem.
|
||
|
*/
|
||
|
$updatedVector = array();
|
||
|
foreach ($required as $id => $value) {
|
||
|
if (isset($nextResVector[$id])) {
|
||
|
$updatedVector[$id] = $value;
|
||
|
}
|
||
|
}
|
||
|
$required = $updatedVector;
|
||
|
}
|
||
|
|
||
|
if (count($required) == 0) {
|
||
|
// Empty result set, we don't need to check other terms
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
if ($required !== null) {
|
||
|
$this->_resVector = &$required;
|
||
|
} else {
|
||
|
$this->_resVector = &$optional;
|
||
|
}
|
||
|
|
||
|
ksort($this->_resVector, SORT_NUMERIC);
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Score calculator for conjunction queries (all subqueries are required)
|
||
|
*
|
||
|
* @param integer $docId
|
||
|
* @param Zend_Search_Lucene_Interface $reader
|
||
|
* @return float
|
||
|
*/
|
||
|
public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
|
||
|
{
|
||
|
if ($this->_coord === null) {
|
||
|
$this->_coord = $reader->getSimilarity()->coord(count($this->_subqueries),
|
||
|
count($this->_subqueries) );
|
||
|
}
|
||
|
|
||
|
$score = 0;
|
||
|
|
||
|
foreach ($this->_subqueries as $subquery) {
|
||
|
$subscore = $subquery->score($docId, $reader);
|
||
|
|
||
|
if ($subscore == 0) {
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
$score += $subquery->score($docId, $reader) * $this->_coord;
|
||
|
}
|
||
|
|
||
|
return $score * $this->_coord * $this->getBoost();
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Score calculator for non conjunction queries (not all subqueries are required)
|
||
|
*
|
||
|
* @param integer $docId
|
||
|
* @param Zend_Search_Lucene_Interface $reader
|
||
|
* @return float
|
||
|
*/
|
||
|
public function _nonConjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
|
||
|
{
|
||
|
if ($this->_coord === null) {
|
||
|
$this->_coord = array();
|
||
|
|
||
|
$maxCoord = 0;
|
||
|
foreach ($this->_signs as $sign) {
|
||
|
if ($sign !== false /* not prohibited */) {
|
||
|
$maxCoord++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for ($count = 0; $count <= $maxCoord; $count++) {
|
||
|
$this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
$score = 0;
|
||
|
$matchedSubqueries = 0;
|
||
|
foreach ($this->_subqueries as $subqueryId => $subquery) {
|
||
|
$subscore = $subquery->score($docId, $reader);
|
||
|
|
||
|
// Prohibited
|
||
|
if ($this->_signs[$subqueryId] === false && $subscore != 0) {
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
// is required, but doen't match
|
||
|
if ($this->_signs[$subqueryId] === true && $subscore == 0) {
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
if ($subscore != 0) {
|
||
|
$matchedSubqueries++;
|
||
|
$score += $subscore;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return $score * $this->_coord[$matchedSubqueries] * $this->getBoost();
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Execute query in context of index reader
|
||
|
* It also initializes necessary internal structures
|
||
|
*
|
||
|
* @param Zend_Search_Lucene_Interface $reader
|
||
|
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||
|
*/
|
||
|
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
|
||
|
{
|
||
|
// Initialize weight if it's not done yet
|
||
|
$this->_initWeight($reader);
|
||
|
|
||
|
if ($docsFilter === null) {
|
||
|
// Create local documents filter if it's not provided by upper query
|
||
|
require_once 'Zend/Search/Lucene/Index/DocsFilter.php';
|
||
|
$docsFilter = new Zend_Search_Lucene_Index_DocsFilter();
|
||
|
}
|
||
|
|
||
|
foreach ($this->_subqueries as $subqueryId => $subquery) {
|
||
|
if ($this->_signs == null || $this->_signs[$subqueryId] === true) {
|
||
|
// Subquery is required
|
||
|
$subquery->execute($reader, $docsFilter);
|
||
|
} else {
|
||
|
$subquery->execute($reader);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if ($this->_signs === null) {
|
||
|
$this->_calculateConjunctionResult();
|
||
|
} else {
|
||
|
$this->_calculateNonConjunctionResult();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Get document ids likely matching the query
|
||
|
*
|
||
|
* It's an array with document ids as keys (performance considerations)
|
||
|
*
|
||
|
* @return array
|
||
|
*/
|
||
|
public function matchedDocs()
|
||
|
{
|
||
|
return $this->_resVector;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Score specified document
|
||
|
*
|
||
|
* @param integer $docId
|
||
|
* @param Zend_Search_Lucene_Interface $reader
|
||
|
* @return float
|
||
|
*/
|
||
|
public function score($docId, Zend_Search_Lucene_Interface $reader)
|
||
|
{
|
||
|
if (isset($this->_resVector[$docId])) {
|
||
|
if ($this->_signs === null) {
|
||
|
return $this->_conjunctionScore($docId, $reader);
|
||
|
} else {
|
||
|
return $this->_nonConjunctionScore($docId, $reader);
|
||
|
}
|
||
|
} else {
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return query terms
|
||
|
*
|
||
|
* @return array
|
||
|
*/
|
||
|
public function getQueryTerms()
|
||
|
{
|
||
|
$terms = array();
|
||
|
|
||
|
foreach ($this->_subqueries as $id => $subquery) {
|
||
|
if ($this->_signs === null || $this->_signs[$id] !== false) {
|
||
|
$terms = array_merge($terms, $subquery->getQueryTerms());
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return $terms;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Query specific matches highlighting
|
||
|
*
|
||
|
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||
|
*/
|
||
|
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
|
||
|
{
|
||
|
foreach ($this->_subqueries as $id => $subquery) {
|
||
|
if ($this->_signs === null || $this->_signs[$id] !== false) {
|
||
|
$subquery->_highlightMatches($highlighter);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Print a query
|
||
|
*
|
||
|
* @return string
|
||
|
*/
|
||
|
public function __toString()
|
||
|
{
|
||
|
// It's used only for query visualisation, so we don't care about characters escaping
|
||
|
|
||
|
$query = '';
|
||
|
|
||
|
foreach ($this->_subqueries as $id => $subquery) {
|
||
|
if ($id != 0) {
|
||
|
$query .= ' ';
|
||
|
}
|
||
|
|
||
|
if ($this->_signs === null || $this->_signs[$id] === true) {
|
||
|
$query .= '+';
|
||
|
} else if ($this->_signs[$id] === false) {
|
||
|
$query .= '-';
|
||
|
}
|
||
|
|
||
|
$query .= '(' . $subquery->__toString() . ')';
|
||
|
}
|
||
|
|
||
|
if ($this->getBoost() != 1) {
|
||
|
$query = '(' . $query . ')^' . round($this->getBoost(), 4);
|
||
|
}
|
||
|
|
||
|
return $query;
|
||
|
}
|
||
|
}
|
||
|
|