API FileTextExtractor::getContent now supports a File and a filename path string

This commit is contained in:
Robbie Averill 2018-07-03 17:03:47 +12:00
parent 40e4b05f5d
commit 397e7a5d40
6 changed files with 17 additions and 17 deletions

View File

@ -181,8 +181,8 @@ abstract class FileTextExtractor
/**
* Given a File instance, extract the contents as text.
*
* @param File $file
* @param File|string $file Either the File instance, or a file path for a file to load
* @return string
*/
abstract public function getContent(File $file);
abstract public function getContent($file);
}

View File

@ -54,9 +54,9 @@ class HTMLTextExtractor extends FileTextExtractor
* @param File $file
* @return string
*/
public function getContent(File $file)
public function getContent($file)
{
$content = $file->getString();
$content = $file instanceof File ? $file->getString() : file_get_contents($file);
// Yes, yes, regex'ing HTML is evil.
// Since we don't care about well-formedness or markup here, it does the job.

View File

@ -84,9 +84,9 @@ class PDFTextExtractor extends FileTextExtractor
return null;
}
public function getContent(File $file)
public function getContent($file)
{
if (!$file) {
if (!$file || (is_string($file) && !file_exists($file))) {
// no file
return '';
}
@ -97,17 +97,17 @@ class PDFTextExtractor extends FileTextExtractor
/**
* Invoke pdftotext with the given File object
*
* @param File $file
* @param File|string $file
* @return string Output
* @throws Exception
*/
protected function getRawOutput(File $file)
protected function getRawOutput($file)
{
if (!$this->isAvailable()) {
throw new Exception("getRawOutput called on unavailable extractor");
}
$path = $this->getPathFromFile($file);
$path = $file instanceof File ? $this->getPathFromFile($file) : $file;
exec(sprintf('%s %s - 2>&1', $this->bin('pdftotext'), escapeshellarg($path)), $content, $err);
if ($err) {
if (!is_array($err) && $err == 1) {

View File

@ -99,18 +99,18 @@ class SolrCellTextExtractor extends FileTextExtractor
}
/**
* @param File $file
* @param File|string $file
* @return string
* @throws InvalidArgumentException
*/
public function getContent(File $file)
public function getContent($file)
{
if (!$file) {
if (!$file || (is_string($file) && !file_exists($file))) {
// no file
return '';
}
$fileName = $file->getFilename();
$fileName = $file instanceof File ? $file->getFilename() : basename($file);
$client = $this->getHttpClient();
// Get and validate base URL

View File

@ -124,9 +124,9 @@ class TikaServerTextExtractor extends FileTextExtractor
return false;
}
public function getContent(File $file)
public function getContent($file)
{
$tempFile = $this->getPathFromFile($file);
$tempFile = $file instanceof File ? $this->getPathFromFile($file) : $file;
return $this->getClient()->tika($tempFile);
}
}

View File

@ -74,10 +74,10 @@ class TikaTextExtractor extends FileTextExtractor
return proc_close($proc);
}
public function getContent(File $file)
public function getContent($file)
{
$mode = $this->config()->get('output_mode');
$path = $this->getPathFromFile($file);
$path = $file instanceof File ? $this->getPathFromFile($file) : $file;
$command = sprintf('tika %s %s', $mode, escapeshellarg($path));
$code = $this->runShell($command, $output);