API FileTextExtractor::getContent now supports a File and a filename path string

This commit is contained in:
Robbie Averill 2018-07-03 17:03:47 +12:00
parent 40e4b05f5d
commit 397e7a5d40
6 changed files with 17 additions and 17 deletions

View File

@ -181,8 +181,8 @@ abstract class FileTextExtractor
/** /**
* Given a File instance, extract the contents as text. * Given a File instance, extract the contents as text.
* *
* @param File $file * @param File|string $file Either the File instance, or a file path for a file to load
* @return string * @return string
*/ */
abstract public function getContent(File $file); abstract public function getContent($file);
} }

View File

@ -54,9 +54,9 @@ class HTMLTextExtractor extends FileTextExtractor
* @param File $file * @param File $file
* @return string * @return string
*/ */
public function getContent(File $file) public function getContent($file)
{ {
$content = $file->getString(); $content = $file instanceof File ? $file->getString() : file_get_contents($file);
// Yes, yes, regex'ing HTML is evil. // Yes, yes, regex'ing HTML is evil.
// Since we don't care about well-formedness or markup here, it does the job. // Since we don't care about well-formedness or markup here, it does the job.

View File

@ -84,9 +84,9 @@ class PDFTextExtractor extends FileTextExtractor
return null; return null;
} }
public function getContent(File $file) public function getContent($file)
{ {
if (!$file) { if (!$file || (is_string($file) && !file_exists($file))) {
// no file // no file
return ''; return '';
} }
@ -97,17 +97,17 @@ class PDFTextExtractor extends FileTextExtractor
/** /**
* Invoke pdftotext with the given File object * Invoke pdftotext with the given File object
* *
* @param File $file * @param File|string $file
* @return string Output * @return string Output
* @throws Exception * @throws Exception
*/ */
protected function getRawOutput(File $file) protected function getRawOutput($file)
{ {
if (!$this->isAvailable()) { if (!$this->isAvailable()) {
throw new Exception("getRawOutput called on unavailable extractor"); throw new Exception("getRawOutput called on unavailable extractor");
} }
$path = $this->getPathFromFile($file); $path = $file instanceof File ? $this->getPathFromFile($file) : $file;
exec(sprintf('%s %s - 2>&1', $this->bin('pdftotext'), escapeshellarg($path)), $content, $err); exec(sprintf('%s %s - 2>&1', $this->bin('pdftotext'), escapeshellarg($path)), $content, $err);
if ($err) { if ($err) {
if (!is_array($err) && $err == 1) { if (!is_array($err) && $err == 1) {

View File

@ -99,18 +99,18 @@ class SolrCellTextExtractor extends FileTextExtractor
} }
/** /**
* @param File $file * @param File|string $file
* @return string * @return string
* @throws InvalidArgumentException * @throws InvalidArgumentException
*/ */
public function getContent(File $file) public function getContent($file)
{ {
if (!$file) { if (!$file || (is_string($file) && !file_exists($file))) {
// no file // no file
return ''; return '';
} }
$fileName = $file->getFilename(); $fileName = $file instanceof File ? $file->getFilename() : basename($file);
$client = $this->getHttpClient(); $client = $this->getHttpClient();
// Get and validate base URL // Get and validate base URL

View File

@ -124,9 +124,9 @@ class TikaServerTextExtractor extends FileTextExtractor
return false; return false;
} }
public function getContent(File $file) public function getContent($file)
{ {
$tempFile = $this->getPathFromFile($file); $tempFile = $file instanceof File ? $this->getPathFromFile($file) : $file;
return $this->getClient()->tika($tempFile); return $this->getClient()->tika($tempFile);
} }
} }

View File

@ -74,10 +74,10 @@ class TikaTextExtractor extends FileTextExtractor
return proc_close($proc); return proc_close($proc);
} }
public function getContent(File $file) public function getContent($file)
{ {
$mode = $this->config()->get('output_mode'); $mode = $this->config()->get('output_mode');
$path = $this->getPathFromFile($file); $path = $file instanceof File ? $this->getPathFromFile($file) : $file;
$command = sprintf('tika %s %s', $mode, escapeshellarg($path)); $command = sprintf('tika %s %s', $mode, escapeshellarg($path));
$code = $this->runShell($command, $output); $code = $this->runShell($command, $output);