From b32bc08dc490ad1b7b4d43aa83a57f3195a9893d Mon Sep 17 00:00:00 2001
From: Ingo Schommer <ingo@silverstripe.com>
Date: Tue, 7 May 2013 19:27:06 +0200
Subject: [PATCH] More resilience in SolrCellTextExtractor

Shouldn't outright fail the request if a file can't be found
---
 code/extractors/SolrCellTextExtractor.php | 25 +++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)
diff --git a/code/extractors/SolrCellTextExtractor.php b/code/extractors/SolrCellTextExtractor.php
index 103bfe8..6bd1b6d 100644
--- a/code/extractors/SolrCellTextExtractor.php
+++ b/code/extractors/SolrCellTextExtractor.php
@@ -23,6 +23,9 @@ class SolrCellTextExtractor extends FileTextExtractor {
 	protected $httpClient;
 
 	public function getHttpClient() {
+		if(!$this->config()->get('base_url')) {
+			throw new InvalidArgumentException('SolrCellTextExtractor.base_url not specified');
+		}
 		if(!$this->httpClient) $this->httpClient = new Client($this->config()->get('base_url'));
 		return $this->httpClient;
 	}
@@ -53,16 +56,30 @@ class SolrCellTextExtractor extends FileTextExtractor {
 		
 		$fileName = basename($path);
 		$client = $this->getHttpClient();
-		$request = $client
-			->post('?extractOnly=true&extractFormat=text')
-			->addPostFiles(array('myfile' => $path));
-		$response = $request->send();
+		try {
+			$request = $client
+				->post()
+				->addPostFields(array('extractOnly' => 'true', 'extractFormat' => 'text'))
+				->addPostFiles(array('myfile' => $path));
+			$response = $request->send();
+		} catch(InvalidArgumentException $e) {
+			SS_Log::log(
+				sprintf(
+					'Error extracting text from "%s" (message: %s)', 
+					$path, 
+					$e->getMessage()
+				),
+				SS_Log::NOTICE
+			);
+			return null;
+		}
 		// Use preg match to avoid SimpleXML running out of memory on large text nodes
 		preg_match(
 			sprintf('/\<str name\="%s"\>(.*?)\<\/str\>/s', preg_quote($fileName)),
 			(string)$response->getBody(), 
 			$matches
 		);
+
 		return $matches ? $matches[1] : null;
 	}
 }
\ No newline at end of file