API FileTextExtractor::getContent now supports a File and a filename path string

2024-10-22 11:06:00 +02:00 · 2018-07-03 17:03:47 +12:00 · 2018-07-03 17:03:47 +12:00 · 397e7a5d40
commit 397e7a5d40
parent 40e4b05f5d
6 changed files with 17 additions and 17 deletions
--- a/src/Extractor/FileTextExtractor.php
+++ b/src/Extractor/FileTextExtractor.php
@ -181,8 +181,8 @@ abstract class FileTextExtractor
    /**
     * Given a File instance, extract the contents as text.
     *
-     * @param File $file
+     * @param File|string $file Either the File instance, or a file path for a file to load
     * @return string
     */
-    abstract public function getContent(File $file);
+    abstract public function getContent($file);
 }
--- a/src/Extractor/HTMLTextExtractor.php
+++ b/src/Extractor/HTMLTextExtractor.php
@ -54,9 +54,9 @@ class HTMLTextExtractor extends FileTextExtractor
     * @param File $file
     * @return string
     */
-    public function getContent(File $file)
+    public function getContent($file)
    {
-        $content = $file->getString();
+        $content = $file instanceof File ? $file->getString() : file_get_contents($file);

        // Yes, yes, regex'ing HTML is evil.
        // Since we don't care about well-formedness or markup here, it does the job.
--- a/src/Extractor/PDFTextExtractor.php
+++ b/src/Extractor/PDFTextExtractor.php
@ -84,9 +84,9 @@ class PDFTextExtractor extends FileTextExtractor
        return null;
    }

-    public function getContent(File $file)
+    public function getContent($file)
    {
-        if (!$file) {
+        if (!$file || (is_string($file) && !file_exists($file))) {
            // no file
            return '';
        }
@ -97,17 +97,17 @@ class PDFTextExtractor extends FileTextExtractor
    /**
     * Invoke pdftotext with the given File object
     *
-     * @param  File $file
+     * @param  File|string $file
     * @return string Output
     * @throws Exception
     */
-    protected function getRawOutput(File $file)
+    protected function getRawOutput($file)
    {
        if (!$this->isAvailable()) {
            throw new Exception("getRawOutput called on unavailable extractor");
        }

-        $path = $this->getPathFromFile($file);
+        $path = $file instanceof File ? $this->getPathFromFile($file) : $file;
        exec(sprintf('%s %s - 2>&1', $this->bin('pdftotext'), escapeshellarg($path)), $content, $err);
        if ($err) {
            if (!is_array($err) && $err == 1) {
--- a/src/Extractor/SolrCellTextExtractor.php
+++ b/src/Extractor/SolrCellTextExtractor.php
@ -99,18 +99,18 @@ class SolrCellTextExtractor extends FileTextExtractor
    }

    /**
-     * @param File $file
+     * @param File|string $file
     * @return string
     * @throws InvalidArgumentException
     */
-    public function getContent(File $file)
+    public function getContent($file)
    {
-        if (!$file) {
+        if (!$file || (is_string($file) && !file_exists($file))) {
            // no file
            return '';
        }

-        $fileName = $file->getFilename();
+        $fileName = $file instanceof File ? $file->getFilename() : basename($file);
        $client = $this->getHttpClient();

        // Get and validate base URL
--- a/src/Extractor/TikaServerTextExtractor.php
+++ b/src/Extractor/TikaServerTextExtractor.php
@ -124,9 +124,9 @@ class TikaServerTextExtractor extends FileTextExtractor
        return false;
    }

-    public function getContent(File $file)
+    public function getContent($file)
    {
-        $tempFile = $this->getPathFromFile($file);
+        $tempFile = $file instanceof File ? $this->getPathFromFile($file) : $file;
        return $this->getClient()->tika($tempFile);
    }
 }
--- a/src/Extractor/TikaTextExtractor.php
+++ b/src/Extractor/TikaTextExtractor.php
@ -74,10 +74,10 @@ class TikaTextExtractor extends FileTextExtractor
        return proc_close($proc);
    }

-    public function getContent(File $file)
+    public function getContent($file)
    {
        $mode = $this->config()->get('output_mode');
-        $path = $this->getPathFromFile($file);
+        $path = $file instanceof File ? $this->getPathFromFile($file) : $file;
        $command = sprintf('tika %s %s', $mode, escapeshellarg($path));
        $code = $this->runShell($command, $output);