FIX Update SolrCellTextExtractor to use a Guzzle 6 API implementation

This commit is contained in:
Robbie Averill 2018-07-06 15:43:53 +12:00
parent 231a2091af
commit e1e7cdbfa4

View File

@ -4,6 +4,7 @@ namespace SilverStripe\TextExtraction\Extractor;
use Exception; use Exception;
use GuzzleHttp\Client; use GuzzleHttp\Client;
use GuzzleHttp\Psr7\Response;
use InvalidArgumentException; use InvalidArgumentException;
use Psr\Log\LoggerInterface; use Psr\Log\LoggerInterface;
use SilverStripe\Assets\File; use SilverStripe\Assets\File;
@ -120,12 +121,16 @@ class SolrCellTextExtractor extends FileTextExtractor
} }
try { try {
$path = $this->getPathFromFile($file); $stream = $file instanceof File ? $file->getStream() : fopen($file, 'r');
$request = $client /** @var Response $response */
->post($baseUrl) $response = $client
->addPostFields(['extractOnly' => 'true', 'extractFormat' => 'text']) ->post($baseUrl, [
->addPostFiles(['myfile' => $path]); 'multipart' => [
$response = $request->send(); ['name' => 'extractOnly', 'contents' => 'true'],
['name' => 'extractFormat', 'contents' => 'text'],
['name' => 'myfile', 'contents' => $stream],
]
]);
} catch (InvalidArgumentException $e) { } catch (InvalidArgumentException $e) {
$msg = sprintf( $msg = sprintf(
'Error extracting text from "%s" (message: %s)', 'Error extracting text from "%s" (message: %s)',
@ -133,25 +138,20 @@ class SolrCellTextExtractor extends FileTextExtractor
$e->getMessage() $e->getMessage()
); );
Injector::inst()->get(LoggerInterface::class)->notice($msg); Injector::inst()->get(LoggerInterface::class)->notice($msg);
return null; return null;
} catch (Exception $e) { } catch (Exception $e) {
// Catch other errors that Tika can throw vai Guzzle but are not caught and break Solr search // Catch other errors that Tika can throw via Guzzle but are not caught and break Solr search
// query in some cases. // query in some cases.
$msg = sprintf( $msg = sprintf(
'Tika server error attempting to extract from "%s" (message: %s)', 'Tika server error attempting to extract from "%s" (message: %s)',
$path, $fileName,
$e->getMessage() $e->getMessage()
); );
Injector::inst()->get(LoggerInterface::class)->notice($msg); Injector::inst()->get(LoggerInterface::class)->notice($msg);
return null; return null;
} }
// Just initialise it, it doesn't take much.
$matches = []; $matches = [];
// Use preg match to avoid SimpleXML running out of memory on large text nodes // Use preg match to avoid SimpleXML running out of memory on large text nodes
preg_match( preg_match(
sprintf('/\<str name\="%s"\>(.*?)\<\/str\>/s', preg_quote($fileName)), sprintf('/\<str name\="%s"\>(.*?)\<\/str\>/s', preg_quote($fileName)),