BUG Exclude meta info from SolrCell content retrieval

Was matching </str> greedily, which included too much content
This commit is contained in:
Ingo Schommer 2013-03-11 00:56:44 +01:00
parent 9af389f51b
commit f2c8df2348

View File

@ -55,7 +55,7 @@ class SolrCellTextExtractor extends FileTextExtractor {
$response = $request->send();
// Use preg match to avoid SimpleXML running out of memory on large text nodes
preg_match(
sprintf('/\<str name\="%s"\>(.*)\<\/str\>/s', preg_quote($fileName)),
sprintf('/\<str name\="%s"\>(.*?)\<\/str\>/s', preg_quote($fileName)),
(string)$response->getBody(),
$matches
);