From a34c443be5af471716efe38d7381ef456892c332 Mon Sep 17 00:00:00 2001 From: cam-findlay Date: Fri, 7 Jun 2013 10:42:38 +1200 Subject: [PATCH] FIX additional exception handling for Tika errors return via Guzzle. Tika server errors via Guzzle can cause the Solr search query to return a 500 error and breaks search results pages for users. Issues was relating to uncaught exceptions from Guzzle causing a silent fail if a text file is perhaps unreadable or missing (return null never occurs which breaks the search). --- code/extractors/SolrCellTextExtractor.php | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/code/extractors/SolrCellTextExtractor.php b/code/extractors/SolrCellTextExtractor.php index 6bd1b6d..fb37e70 100644 --- a/code/extractors/SolrCellTextExtractor.php +++ b/code/extractors/SolrCellTextExtractor.php @@ -72,7 +72,19 @@ class SolrCellTextExtractor extends FileTextExtractor { SS_Log::NOTICE ); return null; + } catch(Guzzle\Http\Exception\ServerErrorResponseException $e){ + //catch other errors that Tika can throw vai Guzzle but are not caught and break Solr search query in some cases. + SS_Log::log( + sprintf( + 'Tika server error attempting to extract from "%s" (message: %s)', + $path, + $e->getMessage() + ), + SS_Log::NOTICE + ); + return null; } + // Use preg match to avoid SimpleXML running out of memory on large text nodes preg_match( sprintf('/\(.*?)\<\/str\>/s', preg_quote($fileName)), @@ -82,4 +94,4 @@ class SolrCellTextExtractor extends FileTextExtractor { return $matches ? $matches[1] : null; } -} \ No newline at end of file +}