Merge pull request #72 from creative-commoners/pulls/3/php81

ENH PHP 8.1 compatibility
2024-10-22 11:06:00 +02:00 · 2022-04-26 17:58:43 +12:00 · 2022-04-26 17:58:43 +12:00 · 4674084d0d
commit 4674084d0d
parent 77fecc4c53 df8b17ab85
8 changed files with 35 additions and 35 deletions
--- a/src/Cache/FileTextCache/Cache.php
+++ b/src/Cache/FileTextCache/Cache.php
@ -42,7 +42,7 @@ class Cache implements FileTextCache, Flushable
     */
    protected function getKey(File $file)
    {
-        return md5($file->getFilename());
+        return md5($file->getFilename() ?? '');
    }

    /**
--- a/src/Extractor/FileTextExtractor.php
+++ b/src/Extractor/FileTextExtractor.php
@ -58,7 +58,7 @@ abstract class FileTextExtractor
        arsort($classPriorities);

        // Save classes
-        $sortedClasses = array_keys($classPriorities);
+        $sortedClasses = array_keys($classPriorities ?? []);
        return self::$sorted_extractor_classes = $sortedClasses;
    }

@ -81,7 +81,7 @@ abstract class FileTextExtractor
     */
    public static function for_file($file)
    {
-        if (!$file || (is_string($file) && !file_exists($file))) {
+        if (!$file || (is_string($file) && !file_exists($file ?? ''))) {
            return null;
        }

@ -137,11 +137,11 @@ abstract class FileTextExtractor
        }

        // Remove any existing temp files with this name
-        if (file_exists($path)) {
-            unlink($path);
+        if (file_exists($path ?? '')) {
+            unlink($path ?? '');
        }

-        $bytesWritten = file_put_contents($path, $file->getStream());
+        $bytesWritten = file_put_contents($path ?? '', $file->getStream());
        if (false === $bytesWritten) {
            throw new Exception(static::class . '->getPathFromFile() failed to write temporary file');
        }
--- a/src/Extractor/HTMLTextExtractor.php
+++ b/src/Extractor/HTMLTextExtractor.php
@ -34,7 +34,7 @@ class HTMLTextExtractor extends FileTextExtractor
     */
    public function supportsExtension($extension)
    {
-        return in_array(strtolower($extension), ["html", "htm", "xhtml"]);
+        return in_array(strtolower($extension ?? ''), ["html", "htm", "xhtml"]);
    }

    /**
@ -43,7 +43,7 @@ class HTMLTextExtractor extends FileTextExtractor
     */
    public function supportsMime($mime)
    {
-        return strtolower($mime) === 'text/html';
+        return strtolower($mime ?? '') === 'text/html';
    }

    /**
@ -56,7 +56,7 @@ class HTMLTextExtractor extends FileTextExtractor
     */
    public function getContent($file)
    {
-        $content = $file instanceof File ? $file->getString() : file_get_contents($file);
+        $content = $file instanceof File ? $file->getString() : file_get_contents($file ?? '');

        // Yes, yes, regex'ing HTML is evil.
        // Since we don't care about well-formedness or markup here, it does the job.
@ -82,9 +82,9 @@ class HTMLTextExtractor extends FileTextExtractor
                '@</?((frameset)|(frame)|(iframe))@iu',
            ],
            [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', "$0", "$0", "$0", "$0", "$0", "$0", "$0", "$0"],
-            $content
+            $content ?? ''
        );

-        return strip_tags($content);
+        return strip_tags($content ?? '');
    }
 }
--- a/src/Extractor/PDFTextExtractor.php
+++ b/src/Extractor/PDFTextExtractor.php
@ -33,18 +33,18 @@ class PDFTextExtractor extends FileTextExtractor
    public function isAvailable()
    {
        $bin = $this->bin('pdftotext');
-        return $bin && file_exists($bin) && is_executable($bin);
+        return $bin && file_exists($bin ?? '') && is_executable($bin ?? '');
    }

    public function supportsExtension($extension)
    {
-        return strtolower($extension) === 'pdf';
+        return strtolower($extension ?? '') === 'pdf';
    }

    public function supportsMime($mime)
    {
        return in_array(
-            strtolower($mime),
+            strtolower($mime ?? ''),
            [
                'application/pdf',
                'application/x-pdf',
@ -72,7 +72,7 @@ class PDFTextExtractor extends FileTextExtractor
        // Find program in each path
        foreach ($locations as $location) {
            $path = "{$location}/{$program}";
-            if (file_exists($path)) {
+            if (file_exists($path ?? '')) {
                return $path;
            }
            if (file_exists($path . '.exe')) {
@ -86,7 +86,7 @@ class PDFTextExtractor extends FileTextExtractor

    public function getContent($file)
    {
-        if (!$file || (is_string($file) && !file_exists($file))) {
+        if (!$file || (is_string($file) && !file_exists($file ?? ''))) {
            // no file
            return '';
        }
@ -108,7 +108,7 @@ class PDFTextExtractor extends FileTextExtractor
        }

        $path = $file instanceof File ? $this->getPathFromFile($file) : $file;
-        exec(sprintf('%s %s - 2>&1', $this->bin('pdftotext'), escapeshellarg($path)), $content, $err);
+        exec(sprintf('%s %s - 2>&1', $this->bin('pdftotext'), escapeshellarg($path ?? '')), $content, $err);

        if ($err) {
            throw new Exception(sprintf(
@ -141,6 +141,6 @@ class PDFTextExtractor extends FileTextExtractor
            'ﬆ' => 'st'
        ];

-        return str_replace(array_keys($mapping), array_values($mapping), $input);
+        return str_replace(array_keys($mapping ?? []), array_values($mapping ?? []), $input ?? '');
    }
 }
--- a/src/Extractor/SolrCellTextExtractor.php
+++ b/src/Extractor/SolrCellTextExtractor.php
@ -80,7 +80,7 @@ class SolrCellTextExtractor extends FileTextExtractor
    public function supportsExtension($extension)
    {
        return in_array(
-            strtolower($extension),
+            strtolower($extension ?? ''),
            [
                'pdf', 'doc', 'docx', 'xls', 'xlsx',
                'epub', 'rtf', 'odt', 'fodt', 'ods', 'fods',
@ -106,12 +106,12 @@ class SolrCellTextExtractor extends FileTextExtractor
     */
    public function getContent($file)
    {
-        if (!$file || (is_string($file) && !file_exists($file))) {
+        if (!$file || (is_string($file) && !file_exists($file ?? ''))) {
            // no file
            return '';
        }

-        $fileName = $file instanceof File ? $file->getFilename() : basename($file);
+        $fileName = $file instanceof File ? $file->getFilename() : basename($file ?? '');
        $client = $this->getHttpClient();

        // Get and validate base URL
@ -121,7 +121,7 @@ class SolrCellTextExtractor extends FileTextExtractor
        }

        try {
-            $stream = $file instanceof File ? $file->getStream() : fopen($file, 'r');
+            $stream = $file instanceof File ? $file->getStream() : fopen($file ?? '', 'r');
            /** @var Response $response */
            $response = $client
                ->post($baseUrl, [
@ -154,7 +154,7 @@ class SolrCellTextExtractor extends FileTextExtractor
        $matches = [];
        // Use preg match to avoid SimpleXML running out of memory on large text nodes
        preg_match(
-            sprintf('/\<str name\="%s"\>(.*?)\<\/str\>/s', preg_quote($fileName)),
+            sprintf('/\<str name\="%s"\>(.*?)\<\/str\>/s', preg_quote($fileName ?? '')),
            (string)$response->getBody(),
            $matches
        );
--- a/src/Extractor/TikaServerTextExtractor.php
+++ b/src/Extractor/TikaServerTextExtractor.php
@ -86,7 +86,7 @@ class TikaServerTextExtractor extends FileTextExtractor
    {
        return $this->getServerEndpoint()
            && $this->getClient()->isAvailable()
-            && version_compare($this->getVersion(), '1.7') >= 0;
+            && version_compare($this->getVersion() ?? '', '1.7') >= 0;
    }

    /**
@ -116,7 +116,7 @@ class TikaServerTextExtractor extends FileTextExtractor

        // Check aliases
        foreach ($this->supportedMimes as $info) {
-            if (isset($info['alias']) && in_array($mime, $info['alias'])) {
+            if (isset($info['alias']) && in_array($mime, $info['alias'] ?? [])) {
                return true;
            }
        }
@ -130,7 +130,7 @@ class TikaServerTextExtractor extends FileTextExtractor
        $content = $this->getClient()->tika($tempFile);
        //Cleanup temp file
        if ($file instanceof File) {
-            unlink($tempFile);
+            unlink($tempFile ?? '');
        }
        return $content;
    }
--- a/src/Extractor/TikaTextExtractor.php
+++ b/src/Extractor/TikaTextExtractor.php
@ -29,7 +29,7 @@ class TikaTextExtractor extends FileTextExtractor
        $code = $this->runShell('tika --version', $stdout);

        // Parse output
-        if (!$code && preg_match('/Apache Tika (?<version>[\.\d]+)/', $stdout, $matches)) {
+        if (!$code && preg_match('/Apache Tika (?<version>[\.\d]+)/', $stdout ?? '', $matches)) {
            return $matches['version'];
        }

@ -54,14 +54,14 @@ class TikaTextExtractor extends FileTextExtractor
        ];
        // Invoke command
        $pipes = [];
-        $proc = proc_open($command, $descriptorSpecs, $pipes);
+        $proc = proc_open($command ?? '', $descriptorSpecs ?? [], $pipes);

        if (!is_resource($proc)) {
            return 255;
        }

        // Send content as input
-        fwrite($pipes[0], $input);
+        fwrite($pipes[0], $input ?? '');
        fclose($pipes[0]);

        // Get output
@ -78,11 +78,11 @@ class TikaTextExtractor extends FileTextExtractor
    {
        $mode = $this->config()->get('output_mode');
        $path = $file instanceof File ? $this->getPathFromFile($file) : $file;
-        $command = sprintf('tika %s %s', $mode, escapeshellarg($path));
+        $command = sprintf('tika %s %s', $mode, escapeshellarg($path ?? ''));
        $code = $this->runShell($command, $output);
        //Cleanup temp file
        if ($file instanceof File) {
-            unlink($path);
+            unlink($path ?? '');
        }

        if ($code == 0) {
@ -123,8 +123,8 @@ class TikaTextExtractor extends FileTextExtractor
        }

        // Check if the mime type is inside the result
-        $pattern = sprintf('/\b(%s)\b/', preg_quote($mime, '/'));
+        $pattern = sprintf('/\b(%s)\b/', preg_quote($mime ?? '', '/'));

-        return (bool)preg_match($pattern, $supportedTypes);
+        return (bool)preg_match($pattern ?? '', $supportedTypes ?? '');
    }
 }
--- a/src/Rest/TikaRestClient.php
+++ b/src/Rest/TikaRestClient.php
@ -80,7 +80,7 @@ class TikaRestClient extends Client

        // Parse output
        if ($response->getStatusCode() == 200
-            && preg_match('/Apache Tika (?<version>[\.\d]+)/', $response->getBody(), $matches)
+            && preg_match('/Apache Tika (?<version>[\.\d]+)/', $response->getBody() ?? '', $matches)
        ) {
            $version = $matches['version'];
        }
@ -129,7 +129,7 @@ class TikaRestClient extends Client
                    'headers' => [
                        'Accept' => 'text/plain',
                    ],
-                    'body' => file_get_contents($file),
+                    'body' => file_get_contents($file ?? ''),
                ])
            );
            $text = $response->getBody();