From f2c8df2348ca0d2d3affdd23d3f364a8341be534 Mon Sep 17 00:00:00 2001
From: Ingo Schommer <ingo@silverstripe.com>
Date: Mon, 11 Mar 2013 00:56:44 +0100
Subject: [PATCH] BUG Exclude meta info from SolrCell content retrieval

Was matching </str> greedily, which included too much content
---
 code/extractors/SolrCellTextExtractor.php | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/code/extractors/SolrCellTextExtractor.php b/code/extractors/SolrCellTextExtractor.php
index def9072..57c6b3a 100644
--- a/code/extractors/SolrCellTextExtractor.php
+++ b/code/extractors/SolrCellTextExtractor.php
@@ -55,7 +55,7 @@ class SolrCellTextExtractor extends FileTextExtractor {
 		$response = $request->send();
 		// Use preg match to avoid SimpleXML running out of memory on large text nodes
 		preg_match(
-			sprintf('/\<str name\="%s"\>(.*)\<\/str\>/s', preg_quote($fileName)),
+			sprintf('/\<str name\="%s"\>(.*?)\<\/str\>/s', preg_quote($fileName)),
 			(string)$response->getBody(), 
 			$matches
 		);