From f2c8df2348ca0d2d3affdd23d3f364a8341be534 Mon Sep 17 00:00:00 2001 From: Ingo Schommer Date: Mon, 11 Mar 2013 00:56:44 +0100 Subject: [PATCH] BUG Exclude meta info from SolrCell content retrieval Was matching greedily, which included too much content --- code/extractors/SolrCellTextExtractor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/extractors/SolrCellTextExtractor.php b/code/extractors/SolrCellTextExtractor.php index def9072..57c6b3a 100644 --- a/code/extractors/SolrCellTextExtractor.php +++ b/code/extractors/SolrCellTextExtractor.php @@ -55,7 +55,7 @@ class SolrCellTextExtractor extends FileTextExtractor { $response = $request->send(); // Use preg match to avoid SimpleXML running out of memory on large text nodes preg_match( - sprintf('/\(.*)\<\/str\>/s', preg_quote($fileName)), + sprintf('/\(.*?)\<\/str\>/s', preg_quote($fileName)), (string)$response->getBody(), $matches );