mirror of
https://github.com/silverstripe/silverstripe-textextraction
synced 2024-10-22 09:06:00 +00:00
More resilience in SolrCellTextExtractor
Shouldn't outright fail the request if a file can't be found
This commit is contained in:
parent
b86483abc4
commit
b32bc08dc4
@ -23,6 +23,9 @@ class SolrCellTextExtractor extends FileTextExtractor {
|
|||||||
protected $httpClient;
|
protected $httpClient;
|
||||||
|
|
||||||
public function getHttpClient() {
|
public function getHttpClient() {
|
||||||
|
if(!$this->config()->get('base_url')) {
|
||||||
|
throw new InvalidArgumentException('SolrCellTextExtractor.base_url not specified');
|
||||||
|
}
|
||||||
if(!$this->httpClient) $this->httpClient = new Client($this->config()->get('base_url'));
|
if(!$this->httpClient) $this->httpClient = new Client($this->config()->get('base_url'));
|
||||||
return $this->httpClient;
|
return $this->httpClient;
|
||||||
}
|
}
|
||||||
@ -53,16 +56,30 @@ class SolrCellTextExtractor extends FileTextExtractor {
|
|||||||
|
|
||||||
$fileName = basename($path);
|
$fileName = basename($path);
|
||||||
$client = $this->getHttpClient();
|
$client = $this->getHttpClient();
|
||||||
|
try {
|
||||||
$request = $client
|
$request = $client
|
||||||
->post('?extractOnly=true&extractFormat=text')
|
->post()
|
||||||
|
->addPostFields(array('extractOnly' => 'true', 'extractFormat' => 'text'))
|
||||||
->addPostFiles(array('myfile' => $path));
|
->addPostFiles(array('myfile' => $path));
|
||||||
$response = $request->send();
|
$response = $request->send();
|
||||||
|
} catch(InvalidArgumentException $e) {
|
||||||
|
SS_Log::log(
|
||||||
|
sprintf(
|
||||||
|
'Error extracting text from "%s" (message: %s)',
|
||||||
|
$path,
|
||||||
|
$e->getMessage()
|
||||||
|
),
|
||||||
|
SS_Log::NOTICE
|
||||||
|
);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
// Use preg match to avoid SimpleXML running out of memory on large text nodes
|
// Use preg match to avoid SimpleXML running out of memory on large text nodes
|
||||||
preg_match(
|
preg_match(
|
||||||
sprintf('/\<str name\="%s"\>(.*?)\<\/str\>/s', preg_quote($fileName)),
|
sprintf('/\<str name\="%s"\>(.*?)\<\/str\>/s', preg_quote($fileName)),
|
||||||
(string)$response->getBody(),
|
(string)$response->getBody(),
|
||||||
$matches
|
$matches
|
||||||
);
|
);
|
||||||
|
|
||||||
return $matches ? $matches[1] : null;
|
return $matches ? $matches[1] : null;
|
||||||
}
|
}
|
||||||
}
|
}
|
Loading…
x
Reference in New Issue
Block a user