diff --git a/src/Cache/FileTextCache/Cache.php b/src/Cache/FileTextCache/Cache.php index 44bf13d..b5b2934 100644 --- a/src/Cache/FileTextCache/Cache.php +++ b/src/Cache/FileTextCache/Cache.php @@ -42,7 +42,7 @@ class Cache implements FileTextCache, Flushable */ protected function getKey(File $file) { - return md5($file->getFilename()); + return md5($file->getFilename() ?? ''); } /** diff --git a/src/Extractor/FileTextExtractor.php b/src/Extractor/FileTextExtractor.php index bea066d..d34cac9 100644 --- a/src/Extractor/FileTextExtractor.php +++ b/src/Extractor/FileTextExtractor.php @@ -58,7 +58,7 @@ abstract class FileTextExtractor arsort($classPriorities); // Save classes - $sortedClasses = array_keys($classPriorities); + $sortedClasses = array_keys($classPriorities ?? []); return self::$sorted_extractor_classes = $sortedClasses; } @@ -81,7 +81,7 @@ abstract class FileTextExtractor */ public static function for_file($file) { - if (!$file || (is_string($file) && !file_exists($file))) { + if (!$file || (is_string($file) && !file_exists($file ?? ''))) { return null; } @@ -137,11 +137,11 @@ abstract class FileTextExtractor } // Remove any existing temp files with this name - if (file_exists($path)) { - unlink($path); + if (file_exists($path ?? '')) { + unlink($path ?? ''); } - $bytesWritten = file_put_contents($path, $file->getStream()); + $bytesWritten = file_put_contents($path ?? '', $file->getStream()); if (false === $bytesWritten) { throw new Exception(static::class . '->getPathFromFile() failed to write temporary file'); } diff --git a/src/Extractor/HTMLTextExtractor.php b/src/Extractor/HTMLTextExtractor.php index 4c8db82..c17f36c 100644 --- a/src/Extractor/HTMLTextExtractor.php +++ b/src/Extractor/HTMLTextExtractor.php @@ -34,7 +34,7 @@ class HTMLTextExtractor extends FileTextExtractor */ public function supportsExtension($extension) { - return in_array(strtolower($extension), ["html", "htm", "xhtml"]); + return in_array(strtolower($extension ?? ''), ["html", "htm", "xhtml"]); } /** @@ -43,7 +43,7 @@ class HTMLTextExtractor extends FileTextExtractor */ public function supportsMime($mime) { - return strtolower($mime) === 'text/html'; + return strtolower($mime ?? '') === 'text/html'; } /** @@ -56,7 +56,7 @@ class HTMLTextExtractor extends FileTextExtractor */ public function getContent($file) { - $content = $file instanceof File ? $file->getString() : file_get_contents($file); + $content = $file instanceof File ? $file->getString() : file_get_contents($file ?? ''); // Yes, yes, regex'ing HTML is evil. // Since we don't care about well-formedness or markup here, it does the job. @@ -82,9 +82,9 @@ class HTMLTextExtractor extends FileTextExtractor '@bin('pdftotext'); - return $bin && file_exists($bin) && is_executable($bin); + return $bin && file_exists($bin ?? '') && is_executable($bin ?? ''); } public function supportsExtension($extension) { - return strtolower($extension) === 'pdf'; + return strtolower($extension ?? '') === 'pdf'; } public function supportsMime($mime) { return in_array( - strtolower($mime), + strtolower($mime ?? ''), [ 'application/pdf', 'application/x-pdf', @@ -72,7 +72,7 @@ class PDFTextExtractor extends FileTextExtractor // Find program in each path foreach ($locations as $location) { $path = "{$location}/{$program}"; - if (file_exists($path)) { + if (file_exists($path ?? '')) { return $path; } if (file_exists($path . '.exe')) { @@ -86,7 +86,7 @@ class PDFTextExtractor extends FileTextExtractor public function getContent($file) { - if (!$file || (is_string($file) && !file_exists($file))) { + if (!$file || (is_string($file) && !file_exists($file ?? ''))) { // no file return ''; } @@ -108,7 +108,7 @@ class PDFTextExtractor extends FileTextExtractor } $path = $file instanceof File ? $this->getPathFromFile($file) : $file; - exec(sprintf('%s %s - 2>&1', $this->bin('pdftotext'), escapeshellarg($path)), $content, $err); + exec(sprintf('%s %s - 2>&1', $this->bin('pdftotext'), escapeshellarg($path ?? '')), $content, $err); if ($err) { throw new Exception(sprintf( @@ -141,6 +141,6 @@ class PDFTextExtractor extends FileTextExtractor 'st' => 'st' ]; - return str_replace(array_keys($mapping), array_values($mapping), $input); + return str_replace(array_keys($mapping ?? []), array_values($mapping ?? []), $input ?? ''); } } diff --git a/src/Extractor/SolrCellTextExtractor.php b/src/Extractor/SolrCellTextExtractor.php index afa3ccf..776ec00 100644 --- a/src/Extractor/SolrCellTextExtractor.php +++ b/src/Extractor/SolrCellTextExtractor.php @@ -80,7 +80,7 @@ class SolrCellTextExtractor extends FileTextExtractor public function supportsExtension($extension) { return in_array( - strtolower($extension), + strtolower($extension ?? ''), [ 'pdf', 'doc', 'docx', 'xls', 'xlsx', 'epub', 'rtf', 'odt', 'fodt', 'ods', 'fods', @@ -106,12 +106,12 @@ class SolrCellTextExtractor extends FileTextExtractor */ public function getContent($file) { - if (!$file || (is_string($file) && !file_exists($file))) { + if (!$file || (is_string($file) && !file_exists($file ?? ''))) { // no file return ''; } - $fileName = $file instanceof File ? $file->getFilename() : basename($file); + $fileName = $file instanceof File ? $file->getFilename() : basename($file ?? ''); $client = $this->getHttpClient(); // Get and validate base URL @@ -121,7 +121,7 @@ class SolrCellTextExtractor extends FileTextExtractor } try { - $stream = $file instanceof File ? $file->getStream() : fopen($file, 'r'); + $stream = $file instanceof File ? $file->getStream() : fopen($file ?? '', 'r'); /** @var Response $response */ $response = $client ->post($baseUrl, [ @@ -154,7 +154,7 @@ class SolrCellTextExtractor extends FileTextExtractor $matches = []; // Use preg match to avoid SimpleXML running out of memory on large text nodes preg_match( - sprintf('/\(.*?)\<\/str\>/s', preg_quote($fileName)), + sprintf('/\(.*?)\<\/str\>/s', preg_quote($fileName ?? '')), (string)$response->getBody(), $matches ); diff --git a/src/Extractor/TikaServerTextExtractor.php b/src/Extractor/TikaServerTextExtractor.php index 28ca93a..2d219f7 100644 --- a/src/Extractor/TikaServerTextExtractor.php +++ b/src/Extractor/TikaServerTextExtractor.php @@ -86,7 +86,7 @@ class TikaServerTextExtractor extends FileTextExtractor { return $this->getServerEndpoint() && $this->getClient()->isAvailable() - && version_compare($this->getVersion(), '1.7') >= 0; + && version_compare($this->getVersion() ?? '', '1.7') >= 0; } /** @@ -116,7 +116,7 @@ class TikaServerTextExtractor extends FileTextExtractor // Check aliases foreach ($this->supportedMimes as $info) { - if (isset($info['alias']) && in_array($mime, $info['alias'])) { + if (isset($info['alias']) && in_array($mime, $info['alias'] ?? [])) { return true; } } @@ -130,7 +130,7 @@ class TikaServerTextExtractor extends FileTextExtractor $content = $this->getClient()->tika($tempFile); //Cleanup temp file if ($file instanceof File) { - unlink($tempFile); + unlink($tempFile ?? ''); } return $content; } diff --git a/src/Extractor/TikaTextExtractor.php b/src/Extractor/TikaTextExtractor.php index 5568351..92455fe 100644 --- a/src/Extractor/TikaTextExtractor.php +++ b/src/Extractor/TikaTextExtractor.php @@ -29,7 +29,7 @@ class TikaTextExtractor extends FileTextExtractor $code = $this->runShell('tika --version', $stdout); // Parse output - if (!$code && preg_match('/Apache Tika (?[\.\d]+)/', $stdout, $matches)) { + if (!$code && preg_match('/Apache Tika (?[\.\d]+)/', $stdout ?? '', $matches)) { return $matches['version']; } @@ -54,14 +54,14 @@ class TikaTextExtractor extends FileTextExtractor ]; // Invoke command $pipes = []; - $proc = proc_open($command, $descriptorSpecs, $pipes); + $proc = proc_open($command ?? '', $descriptorSpecs ?? [], $pipes); if (!is_resource($proc)) { return 255; } // Send content as input - fwrite($pipes[0], $input); + fwrite($pipes[0], $input ?? ''); fclose($pipes[0]); // Get output @@ -78,11 +78,11 @@ class TikaTextExtractor extends FileTextExtractor { $mode = $this->config()->get('output_mode'); $path = $file instanceof File ? $this->getPathFromFile($file) : $file; - $command = sprintf('tika %s %s', $mode, escapeshellarg($path)); + $command = sprintf('tika %s %s', $mode, escapeshellarg($path ?? '')); $code = $this->runShell($command, $output); //Cleanup temp file if ($file instanceof File) { - unlink($path); + unlink($path ?? ''); } if ($code == 0) { @@ -123,8 +123,8 @@ class TikaTextExtractor extends FileTextExtractor } // Check if the mime type is inside the result - $pattern = sprintf('/\b(%s)\b/', preg_quote($mime, '/')); + $pattern = sprintf('/\b(%s)\b/', preg_quote($mime ?? '', '/')); - return (bool)preg_match($pattern, $supportedTypes); + return (bool)preg_match($pattern ?? '', $supportedTypes ?? ''); } } diff --git a/src/Rest/TikaRestClient.php b/src/Rest/TikaRestClient.php index 5e021f7..c8ee574 100644 --- a/src/Rest/TikaRestClient.php +++ b/src/Rest/TikaRestClient.php @@ -80,7 +80,7 @@ class TikaRestClient extends Client // Parse output if ($response->getStatusCode() == 200 - && preg_match('/Apache Tika (?[\.\d]+)/', $response->getBody(), $matches) + && preg_match('/Apache Tika (?[\.\d]+)/', $response->getBody() ?? '', $matches) ) { $version = $matches['version']; } @@ -129,7 +129,7 @@ class TikaRestClient extends Client 'headers' => [ 'Accept' => 'text/plain', ], - 'body' => file_get_contents($file), + 'body' => file_get_contents($file ?? ''), ]) ); $text = $response->getBody();