mirror of
https://github.com/silverstripe/silverstripe-textextraction
synced 2024-10-22 09:06:00 +00:00
Merge pull request #9 from chillu/pulls/tika-logging
Improved Tika error logging
This commit is contained in:
commit
6cf09f26c8
12
README.md
12
README.md
@ -146,9 +146,9 @@ each HTTP request is run synchronously.
|
|||||||
|
|
||||||
### Tika
|
### Tika
|
||||||
|
|
||||||
Support for Apache Tika (1.7 and above) is included. This can be run in one of two ways: Server or CLI.
|
Support for Apache Tika (1.8 and above) is included. This can be run in one of two ways: Server or CLI.
|
||||||
|
|
||||||
See [the Apache Tika home page](http://tika.apache.org/1.7/index.html) for instructions on installing and
|
See [the Apache Tika home page](http://tika.apache.org/1.8/index.html) for instructions on installing and
|
||||||
configuring this. Download the latest `tika-app` for running as a CLI script, or `tika-server` if you're planning
|
configuring this. Download the latest `tika-app` for running as a CLI script, or `tika-server` if you're planning
|
||||||
to have it running constantly in the background. Starting tika as a CLI script for every extraction request
|
to have it running constantly in the background. Starting tika as a CLI script for every extraction request
|
||||||
is fairly slow, so we recommend running it as a server.
|
is fairly slow, so we recommend running it as a server.
|
||||||
@ -186,6 +186,14 @@ java -jar tika-server-1.8.jar --host=localhost --port=9998
|
|||||||
While you can run `tika-app-1.8.jar` in server mode as well (with the `--server` flag),
|
While you can run `tika-app-1.8.jar` in server mode as well (with the `--server` flag),
|
||||||
it behaves differently and is not recommended.
|
it behaves differently and is not recommended.
|
||||||
|
|
||||||
|
The module will log extraction errors with `SS_Log::NOTICE` priority by default,
|
||||||
|
for example a "422 Unprocessable Entity" HTTP response for an encrypted PDF.
|
||||||
|
In case you want more information on why processing failed, you can increase
|
||||||
|
the logging verbosity in the tika server instance by passing through
|
||||||
|
a `--includeStack` flag. Logs can passed on to files or external logging services,
|
||||||
|
see [error handling](http://doc.silverstripe.org/en/developer_guides/debugging/error_handling)
|
||||||
|
documentation for SilverStripe core.
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
Manual extraction:
|
Manual extraction:
|
||||||
|
@ -56,19 +56,39 @@ class TikaRestClient extends Client {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract text content from a given file
|
* Extract text content from a given file.
|
||||||
|
* Logs a notice-level error if the document can't be parsed.
|
||||||
*
|
*
|
||||||
* @param string $file Full filesystem path to a file to post
|
* @param string $file Full filesystem path to a file to post
|
||||||
* @return string Content of the file extracted as plain text
|
* @return string Content of the file extracted as plain text
|
||||||
*/
|
*/
|
||||||
public function tika($file) {
|
public function tika($file) {
|
||||||
$response = $this->put(
|
$text = null;
|
||||||
'tika',
|
try {
|
||||||
array('Accept' => 'text/plain'),
|
$response = $this->put(
|
||||||
file_get_contents($file)
|
'tika',
|
||||||
)->send();
|
array('Accept' => 'text/plain'),
|
||||||
|
file_get_contents($file)
|
||||||
|
)->send();
|
||||||
|
$text = $response->getBody(true);
|
||||||
|
} catch(RequestException $e) {
|
||||||
|
$msg = sprintf(
|
||||||
|
'TikaRestClient was not able to process %s. Response: %s %s.',
|
||||||
|
$file,
|
||||||
|
$e->getResponse()->getStatusCode(),
|
||||||
|
$e->getResponse()->getReasonPhrase()
|
||||||
|
);
|
||||||
|
|
||||||
return $response->getBody(true);
|
// Only available if tika-server was started with --includeStack
|
||||||
|
$body = $e->getResponse()->getBody(true);
|
||||||
|
if($body) {
|
||||||
|
$msg .= ' Body: ' . $body;
|
||||||
|
}
|
||||||
|
|
||||||
|
SS_Log::log($msg, SS_Log::NOTICE);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $text;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user