Compare commits

...

2 Commits

Author SHA1 Message Date
Guy Sartorelli
4674084d0d
Merge pull request #72 from creative-commoners/pulls/3/php81
ENH PHP 8.1 compatibility
2022-04-26 17:58:43 +12:00
Steve Boyd
df8b17ab85 ENH PHP 8.1 compatibility 2022-04-13 13:51:04 +12:00
8 changed files with 35 additions and 35 deletions

View File

@ -42,7 +42,7 @@ class Cache implements FileTextCache, Flushable
*/ */
protected function getKey(File $file) protected function getKey(File $file)
{ {
return md5($file->getFilename()); return md5($file->getFilename() ?? '');
} }
/** /**

View File

@ -58,7 +58,7 @@ abstract class FileTextExtractor
arsort($classPriorities); arsort($classPriorities);
// Save classes // Save classes
$sortedClasses = array_keys($classPriorities); $sortedClasses = array_keys($classPriorities ?? []);
return self::$sorted_extractor_classes = $sortedClasses; return self::$sorted_extractor_classes = $sortedClasses;
} }
@ -81,7 +81,7 @@ abstract class FileTextExtractor
*/ */
public static function for_file($file) public static function for_file($file)
{ {
if (!$file || (is_string($file) && !file_exists($file))) { if (!$file || (is_string($file) && !file_exists($file ?? ''))) {
return null; return null;
} }
@ -137,11 +137,11 @@ abstract class FileTextExtractor
} }
// Remove any existing temp files with this name // Remove any existing temp files with this name
if (file_exists($path)) { if (file_exists($path ?? '')) {
unlink($path); unlink($path ?? '');
} }
$bytesWritten = file_put_contents($path, $file->getStream()); $bytesWritten = file_put_contents($path ?? '', $file->getStream());
if (false === $bytesWritten) { if (false === $bytesWritten) {
throw new Exception(static::class . '->getPathFromFile() failed to write temporary file'); throw new Exception(static::class . '->getPathFromFile() failed to write temporary file');
} }

View File

@ -34,7 +34,7 @@ class HTMLTextExtractor extends FileTextExtractor
*/ */
public function supportsExtension($extension) public function supportsExtension($extension)
{ {
return in_array(strtolower($extension), ["html", "htm", "xhtml"]); return in_array(strtolower($extension ?? ''), ["html", "htm", "xhtml"]);
} }
/** /**
@ -43,7 +43,7 @@ class HTMLTextExtractor extends FileTextExtractor
*/ */
public function supportsMime($mime) public function supportsMime($mime)
{ {
return strtolower($mime) === 'text/html'; return strtolower($mime ?? '') === 'text/html';
} }
/** /**
@ -56,7 +56,7 @@ class HTMLTextExtractor extends FileTextExtractor
*/ */
public function getContent($file) public function getContent($file)
{ {
$content = $file instanceof File ? $file->getString() : file_get_contents($file); $content = $file instanceof File ? $file->getString() : file_get_contents($file ?? '');
// Yes, yes, regex'ing HTML is evil. // Yes, yes, regex'ing HTML is evil.
// Since we don't care about well-formedness or markup here, it does the job. // Since we don't care about well-formedness or markup here, it does the job.
@ -82,9 +82,9 @@ class HTMLTextExtractor extends FileTextExtractor
'@</?((frameset)|(frame)|(iframe))@iu', '@</?((frameset)|(frame)|(iframe))@iu',
], ],
[' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', "$0", "$0", "$0", "$0", "$0", "$0", "$0", "$0"], [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', "$0", "$0", "$0", "$0", "$0", "$0", "$0", "$0"],
$content $content ?? ''
); );
return strip_tags($content); return strip_tags($content ?? '');
} }
} }

View File

@ -33,18 +33,18 @@ class PDFTextExtractor extends FileTextExtractor
public function isAvailable() public function isAvailable()
{ {
$bin = $this->bin('pdftotext'); $bin = $this->bin('pdftotext');
return $bin && file_exists($bin) && is_executable($bin); return $bin && file_exists($bin ?? '') && is_executable($bin ?? '');
} }
public function supportsExtension($extension) public function supportsExtension($extension)
{ {
return strtolower($extension) === 'pdf'; return strtolower($extension ?? '') === 'pdf';
} }
public function supportsMime($mime) public function supportsMime($mime)
{ {
return in_array( return in_array(
strtolower($mime), strtolower($mime ?? ''),
[ [
'application/pdf', 'application/pdf',
'application/x-pdf', 'application/x-pdf',
@ -72,7 +72,7 @@ class PDFTextExtractor extends FileTextExtractor
// Find program in each path // Find program in each path
foreach ($locations as $location) { foreach ($locations as $location) {
$path = "{$location}/{$program}"; $path = "{$location}/{$program}";
if (file_exists($path)) { if (file_exists($path ?? '')) {
return $path; return $path;
} }
if (file_exists($path . '.exe')) { if (file_exists($path . '.exe')) {
@ -86,7 +86,7 @@ class PDFTextExtractor extends FileTextExtractor
public function getContent($file) public function getContent($file)
{ {
if (!$file || (is_string($file) && !file_exists($file))) { if (!$file || (is_string($file) && !file_exists($file ?? ''))) {
// no file // no file
return ''; return '';
} }
@ -108,7 +108,7 @@ class PDFTextExtractor extends FileTextExtractor
} }
$path = $file instanceof File ? $this->getPathFromFile($file) : $file; $path = $file instanceof File ? $this->getPathFromFile($file) : $file;
exec(sprintf('%s %s - 2>&1', $this->bin('pdftotext'), escapeshellarg($path)), $content, $err); exec(sprintf('%s %s - 2>&1', $this->bin('pdftotext'), escapeshellarg($path ?? '')), $content, $err);
if ($err) { if ($err) {
throw new Exception(sprintf( throw new Exception(sprintf(
@ -141,6 +141,6 @@ class PDFTextExtractor extends FileTextExtractor
'st' => 'st' 'st' => 'st'
]; ];
return str_replace(array_keys($mapping), array_values($mapping), $input); return str_replace(array_keys($mapping ?? []), array_values($mapping ?? []), $input ?? '');
} }
} }

View File

@ -80,7 +80,7 @@ class SolrCellTextExtractor extends FileTextExtractor
public function supportsExtension($extension) public function supportsExtension($extension)
{ {
return in_array( return in_array(
strtolower($extension), strtolower($extension ?? ''),
[ [
'pdf', 'doc', 'docx', 'xls', 'xlsx', 'pdf', 'doc', 'docx', 'xls', 'xlsx',
'epub', 'rtf', 'odt', 'fodt', 'ods', 'fods', 'epub', 'rtf', 'odt', 'fodt', 'ods', 'fods',
@ -106,12 +106,12 @@ class SolrCellTextExtractor extends FileTextExtractor
*/ */
public function getContent($file) public function getContent($file)
{ {
if (!$file || (is_string($file) && !file_exists($file))) { if (!$file || (is_string($file) && !file_exists($file ?? ''))) {
// no file // no file
return ''; return '';
} }
$fileName = $file instanceof File ? $file->getFilename() : basename($file); $fileName = $file instanceof File ? $file->getFilename() : basename($file ?? '');
$client = $this->getHttpClient(); $client = $this->getHttpClient();
// Get and validate base URL // Get and validate base URL
@ -121,7 +121,7 @@ class SolrCellTextExtractor extends FileTextExtractor
} }
try { try {
$stream = $file instanceof File ? $file->getStream() : fopen($file, 'r'); $stream = $file instanceof File ? $file->getStream() : fopen($file ?? '', 'r');
/** @var Response $response */ /** @var Response $response */
$response = $client $response = $client
->post($baseUrl, [ ->post($baseUrl, [
@ -154,7 +154,7 @@ class SolrCellTextExtractor extends FileTextExtractor
$matches = []; $matches = [];
// Use preg match to avoid SimpleXML running out of memory on large text nodes // Use preg match to avoid SimpleXML running out of memory on large text nodes
preg_match( preg_match(
sprintf('/\<str name\="%s"\>(.*?)\<\/str\>/s', preg_quote($fileName)), sprintf('/\<str name\="%s"\>(.*?)\<\/str\>/s', preg_quote($fileName ?? '')),
(string)$response->getBody(), (string)$response->getBody(),
$matches $matches
); );

View File

@ -86,7 +86,7 @@ class TikaServerTextExtractor extends FileTextExtractor
{ {
return $this->getServerEndpoint() return $this->getServerEndpoint()
&& $this->getClient()->isAvailable() && $this->getClient()->isAvailable()
&& version_compare($this->getVersion(), '1.7') >= 0; && version_compare($this->getVersion() ?? '', '1.7') >= 0;
} }
/** /**
@ -116,7 +116,7 @@ class TikaServerTextExtractor extends FileTextExtractor
// Check aliases // Check aliases
foreach ($this->supportedMimes as $info) { foreach ($this->supportedMimes as $info) {
if (isset($info['alias']) && in_array($mime, $info['alias'])) { if (isset($info['alias']) && in_array($mime, $info['alias'] ?? [])) {
return true; return true;
} }
} }
@ -130,7 +130,7 @@ class TikaServerTextExtractor extends FileTextExtractor
$content = $this->getClient()->tika($tempFile); $content = $this->getClient()->tika($tempFile);
//Cleanup temp file //Cleanup temp file
if ($file instanceof File) { if ($file instanceof File) {
unlink($tempFile); unlink($tempFile ?? '');
} }
return $content; return $content;
} }

View File

@ -29,7 +29,7 @@ class TikaTextExtractor extends FileTextExtractor
$code = $this->runShell('tika --version', $stdout); $code = $this->runShell('tika --version', $stdout);
// Parse output // Parse output
if (!$code && preg_match('/Apache Tika (?<version>[\.\d]+)/', $stdout, $matches)) { if (!$code && preg_match('/Apache Tika (?<version>[\.\d]+)/', $stdout ?? '', $matches)) {
return $matches['version']; return $matches['version'];
} }
@ -54,14 +54,14 @@ class TikaTextExtractor extends FileTextExtractor
]; ];
// Invoke command // Invoke command
$pipes = []; $pipes = [];
$proc = proc_open($command, $descriptorSpecs, $pipes); $proc = proc_open($command ?? '', $descriptorSpecs ?? [], $pipes);
if (!is_resource($proc)) { if (!is_resource($proc)) {
return 255; return 255;
} }
// Send content as input // Send content as input
fwrite($pipes[0], $input); fwrite($pipes[0], $input ?? '');
fclose($pipes[0]); fclose($pipes[0]);
// Get output // Get output
@ -78,11 +78,11 @@ class TikaTextExtractor extends FileTextExtractor
{ {
$mode = $this->config()->get('output_mode'); $mode = $this->config()->get('output_mode');
$path = $file instanceof File ? $this->getPathFromFile($file) : $file; $path = $file instanceof File ? $this->getPathFromFile($file) : $file;
$command = sprintf('tika %s %s', $mode, escapeshellarg($path)); $command = sprintf('tika %s %s', $mode, escapeshellarg($path ?? ''));
$code = $this->runShell($command, $output); $code = $this->runShell($command, $output);
//Cleanup temp file //Cleanup temp file
if ($file instanceof File) { if ($file instanceof File) {
unlink($path); unlink($path ?? '');
} }
if ($code == 0) { if ($code == 0) {
@ -123,8 +123,8 @@ class TikaTextExtractor extends FileTextExtractor
} }
// Check if the mime type is inside the result // Check if the mime type is inside the result
$pattern = sprintf('/\b(%s)\b/', preg_quote($mime, '/')); $pattern = sprintf('/\b(%s)\b/', preg_quote($mime ?? '', '/'));
return (bool)preg_match($pattern, $supportedTypes); return (bool)preg_match($pattern ?? '', $supportedTypes ?? '');
} }
} }

View File

@ -80,7 +80,7 @@ class TikaRestClient extends Client
// Parse output // Parse output
if ($response->getStatusCode() == 200 if ($response->getStatusCode() == 200
&& preg_match('/Apache Tika (?<version>[\.\d]+)/', $response->getBody(), $matches) && preg_match('/Apache Tika (?<version>[\.\d]+)/', $response->getBody() ?? '', $matches)
) { ) {
$version = $matches['version']; $version = $matches['version'];
} }
@ -129,7 +129,7 @@ class TikaRestClient extends Client
'headers' => [ 'headers' => [
'Accept' => 'text/plain', 'Accept' => 'text/plain',
], ],
'body' => file_get_contents($file), 'body' => file_get_contents($file ?? ''),
]) ])
); );
$text = $response->getBody(); $text = $response->getBody();