mirror of
https://github.com/silverstripe/silverstripe-textextraction
synced 2024-10-22 09:06:00 +00:00
Merge pull request #72 from creative-commoners/pulls/3/php81
ENH PHP 8.1 compatibility
This commit is contained in:
commit
4674084d0d
@ -42,7 +42,7 @@ class Cache implements FileTextCache, Flushable
|
||||
*/
|
||||
protected function getKey(File $file)
|
||||
{
|
||||
return md5($file->getFilename());
|
||||
return md5($file->getFilename() ?? '');
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -58,7 +58,7 @@ abstract class FileTextExtractor
|
||||
arsort($classPriorities);
|
||||
|
||||
// Save classes
|
||||
$sortedClasses = array_keys($classPriorities);
|
||||
$sortedClasses = array_keys($classPriorities ?? []);
|
||||
return self::$sorted_extractor_classes = $sortedClasses;
|
||||
}
|
||||
|
||||
@ -81,7 +81,7 @@ abstract class FileTextExtractor
|
||||
*/
|
||||
public static function for_file($file)
|
||||
{
|
||||
if (!$file || (is_string($file) && !file_exists($file))) {
|
||||
if (!$file || (is_string($file) && !file_exists($file ?? ''))) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@ -137,11 +137,11 @@ abstract class FileTextExtractor
|
||||
}
|
||||
|
||||
// Remove any existing temp files with this name
|
||||
if (file_exists($path)) {
|
||||
unlink($path);
|
||||
if (file_exists($path ?? '')) {
|
||||
unlink($path ?? '');
|
||||
}
|
||||
|
||||
$bytesWritten = file_put_contents($path, $file->getStream());
|
||||
$bytesWritten = file_put_contents($path ?? '', $file->getStream());
|
||||
if (false === $bytesWritten) {
|
||||
throw new Exception(static::class . '->getPathFromFile() failed to write temporary file');
|
||||
}
|
||||
|
@ -34,7 +34,7 @@ class HTMLTextExtractor extends FileTextExtractor
|
||||
*/
|
||||
public function supportsExtension($extension)
|
||||
{
|
||||
return in_array(strtolower($extension), ["html", "htm", "xhtml"]);
|
||||
return in_array(strtolower($extension ?? ''), ["html", "htm", "xhtml"]);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -43,7 +43,7 @@ class HTMLTextExtractor extends FileTextExtractor
|
||||
*/
|
||||
public function supportsMime($mime)
|
||||
{
|
||||
return strtolower($mime) === 'text/html';
|
||||
return strtolower($mime ?? '') === 'text/html';
|
||||
}
|
||||
|
||||
/**
|
||||
@ -56,7 +56,7 @@ class HTMLTextExtractor extends FileTextExtractor
|
||||
*/
|
||||
public function getContent($file)
|
||||
{
|
||||
$content = $file instanceof File ? $file->getString() : file_get_contents($file);
|
||||
$content = $file instanceof File ? $file->getString() : file_get_contents($file ?? '');
|
||||
|
||||
// Yes, yes, regex'ing HTML is evil.
|
||||
// Since we don't care about well-formedness or markup here, it does the job.
|
||||
@ -82,9 +82,9 @@ class HTMLTextExtractor extends FileTextExtractor
|
||||
'@</?((frameset)|(frame)|(iframe))@iu',
|
||||
],
|
||||
[' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', "$0", "$0", "$0", "$0", "$0", "$0", "$0", "$0"],
|
||||
$content
|
||||
$content ?? ''
|
||||
);
|
||||
|
||||
return strip_tags($content);
|
||||
return strip_tags($content ?? '');
|
||||
}
|
||||
}
|
||||
|
@ -33,18 +33,18 @@ class PDFTextExtractor extends FileTextExtractor
|
||||
public function isAvailable()
|
||||
{
|
||||
$bin = $this->bin('pdftotext');
|
||||
return $bin && file_exists($bin) && is_executable($bin);
|
||||
return $bin && file_exists($bin ?? '') && is_executable($bin ?? '');
|
||||
}
|
||||
|
||||
public function supportsExtension($extension)
|
||||
{
|
||||
return strtolower($extension) === 'pdf';
|
||||
return strtolower($extension ?? '') === 'pdf';
|
||||
}
|
||||
|
||||
public function supportsMime($mime)
|
||||
{
|
||||
return in_array(
|
||||
strtolower($mime),
|
||||
strtolower($mime ?? ''),
|
||||
[
|
||||
'application/pdf',
|
||||
'application/x-pdf',
|
||||
@ -72,7 +72,7 @@ class PDFTextExtractor extends FileTextExtractor
|
||||
// Find program in each path
|
||||
foreach ($locations as $location) {
|
||||
$path = "{$location}/{$program}";
|
||||
if (file_exists($path)) {
|
||||
if (file_exists($path ?? '')) {
|
||||
return $path;
|
||||
}
|
||||
if (file_exists($path . '.exe')) {
|
||||
@ -86,7 +86,7 @@ class PDFTextExtractor extends FileTextExtractor
|
||||
|
||||
public function getContent($file)
|
||||
{
|
||||
if (!$file || (is_string($file) && !file_exists($file))) {
|
||||
if (!$file || (is_string($file) && !file_exists($file ?? ''))) {
|
||||
// no file
|
||||
return '';
|
||||
}
|
||||
@ -108,7 +108,7 @@ class PDFTextExtractor extends FileTextExtractor
|
||||
}
|
||||
|
||||
$path = $file instanceof File ? $this->getPathFromFile($file) : $file;
|
||||
exec(sprintf('%s %s - 2>&1', $this->bin('pdftotext'), escapeshellarg($path)), $content, $err);
|
||||
exec(sprintf('%s %s - 2>&1', $this->bin('pdftotext'), escapeshellarg($path ?? '')), $content, $err);
|
||||
|
||||
if ($err) {
|
||||
throw new Exception(sprintf(
|
||||
@ -141,6 +141,6 @@ class PDFTextExtractor extends FileTextExtractor
|
||||
'st' => 'st'
|
||||
];
|
||||
|
||||
return str_replace(array_keys($mapping), array_values($mapping), $input);
|
||||
return str_replace(array_keys($mapping ?? []), array_values($mapping ?? []), $input ?? '');
|
||||
}
|
||||
}
|
||||
|
@ -80,7 +80,7 @@ class SolrCellTextExtractor extends FileTextExtractor
|
||||
public function supportsExtension($extension)
|
||||
{
|
||||
return in_array(
|
||||
strtolower($extension),
|
||||
strtolower($extension ?? ''),
|
||||
[
|
||||
'pdf', 'doc', 'docx', 'xls', 'xlsx',
|
||||
'epub', 'rtf', 'odt', 'fodt', 'ods', 'fods',
|
||||
@ -106,12 +106,12 @@ class SolrCellTextExtractor extends FileTextExtractor
|
||||
*/
|
||||
public function getContent($file)
|
||||
{
|
||||
if (!$file || (is_string($file) && !file_exists($file))) {
|
||||
if (!$file || (is_string($file) && !file_exists($file ?? ''))) {
|
||||
// no file
|
||||
return '';
|
||||
}
|
||||
|
||||
$fileName = $file instanceof File ? $file->getFilename() : basename($file);
|
||||
$fileName = $file instanceof File ? $file->getFilename() : basename($file ?? '');
|
||||
$client = $this->getHttpClient();
|
||||
|
||||
// Get and validate base URL
|
||||
@ -121,7 +121,7 @@ class SolrCellTextExtractor extends FileTextExtractor
|
||||
}
|
||||
|
||||
try {
|
||||
$stream = $file instanceof File ? $file->getStream() : fopen($file, 'r');
|
||||
$stream = $file instanceof File ? $file->getStream() : fopen($file ?? '', 'r');
|
||||
/** @var Response $response */
|
||||
$response = $client
|
||||
->post($baseUrl, [
|
||||
@ -154,7 +154,7 @@ class SolrCellTextExtractor extends FileTextExtractor
|
||||
$matches = [];
|
||||
// Use preg match to avoid SimpleXML running out of memory on large text nodes
|
||||
preg_match(
|
||||
sprintf('/\<str name\="%s"\>(.*?)\<\/str\>/s', preg_quote($fileName)),
|
||||
sprintf('/\<str name\="%s"\>(.*?)\<\/str\>/s', preg_quote($fileName ?? '')),
|
||||
(string)$response->getBody(),
|
||||
$matches
|
||||
);
|
||||
|
@ -86,7 +86,7 @@ class TikaServerTextExtractor extends FileTextExtractor
|
||||
{
|
||||
return $this->getServerEndpoint()
|
||||
&& $this->getClient()->isAvailable()
|
||||
&& version_compare($this->getVersion(), '1.7') >= 0;
|
||||
&& version_compare($this->getVersion() ?? '', '1.7') >= 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -116,7 +116,7 @@ class TikaServerTextExtractor extends FileTextExtractor
|
||||
|
||||
// Check aliases
|
||||
foreach ($this->supportedMimes as $info) {
|
||||
if (isset($info['alias']) && in_array($mime, $info['alias'])) {
|
||||
if (isset($info['alias']) && in_array($mime, $info['alias'] ?? [])) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -130,7 +130,7 @@ class TikaServerTextExtractor extends FileTextExtractor
|
||||
$content = $this->getClient()->tika($tempFile);
|
||||
//Cleanup temp file
|
||||
if ($file instanceof File) {
|
||||
unlink($tempFile);
|
||||
unlink($tempFile ?? '');
|
||||
}
|
||||
return $content;
|
||||
}
|
||||
|
@ -29,7 +29,7 @@ class TikaTextExtractor extends FileTextExtractor
|
||||
$code = $this->runShell('tika --version', $stdout);
|
||||
|
||||
// Parse output
|
||||
if (!$code && preg_match('/Apache Tika (?<version>[\.\d]+)/', $stdout, $matches)) {
|
||||
if (!$code && preg_match('/Apache Tika (?<version>[\.\d]+)/', $stdout ?? '', $matches)) {
|
||||
return $matches['version'];
|
||||
}
|
||||
|
||||
@ -54,14 +54,14 @@ class TikaTextExtractor extends FileTextExtractor
|
||||
];
|
||||
// Invoke command
|
||||
$pipes = [];
|
||||
$proc = proc_open($command, $descriptorSpecs, $pipes);
|
||||
$proc = proc_open($command ?? '', $descriptorSpecs ?? [], $pipes);
|
||||
|
||||
if (!is_resource($proc)) {
|
||||
return 255;
|
||||
}
|
||||
|
||||
// Send content as input
|
||||
fwrite($pipes[0], $input);
|
||||
fwrite($pipes[0], $input ?? '');
|
||||
fclose($pipes[0]);
|
||||
|
||||
// Get output
|
||||
@ -78,11 +78,11 @@ class TikaTextExtractor extends FileTextExtractor
|
||||
{
|
||||
$mode = $this->config()->get('output_mode');
|
||||
$path = $file instanceof File ? $this->getPathFromFile($file) : $file;
|
||||
$command = sprintf('tika %s %s', $mode, escapeshellarg($path));
|
||||
$command = sprintf('tika %s %s', $mode, escapeshellarg($path ?? ''));
|
||||
$code = $this->runShell($command, $output);
|
||||
//Cleanup temp file
|
||||
if ($file instanceof File) {
|
||||
unlink($path);
|
||||
unlink($path ?? '');
|
||||
}
|
||||
|
||||
if ($code == 0) {
|
||||
@ -123,8 +123,8 @@ class TikaTextExtractor extends FileTextExtractor
|
||||
}
|
||||
|
||||
// Check if the mime type is inside the result
|
||||
$pattern = sprintf('/\b(%s)\b/', preg_quote($mime, '/'));
|
||||
$pattern = sprintf('/\b(%s)\b/', preg_quote($mime ?? '', '/'));
|
||||
|
||||
return (bool)preg_match($pattern, $supportedTypes);
|
||||
return (bool)preg_match($pattern ?? '', $supportedTypes ?? '');
|
||||
}
|
||||
}
|
||||
|
@ -80,7 +80,7 @@ class TikaRestClient extends Client
|
||||
|
||||
// Parse output
|
||||
if ($response->getStatusCode() == 200
|
||||
&& preg_match('/Apache Tika (?<version>[\.\d]+)/', $response->getBody(), $matches)
|
||||
&& preg_match('/Apache Tika (?<version>[\.\d]+)/', $response->getBody() ?? '', $matches)
|
||||
) {
|
||||
$version = $matches['version'];
|
||||
}
|
||||
@ -129,7 +129,7 @@ class TikaRestClient extends Client
|
||||
'headers' => [
|
||||
'Accept' => 'text/plain',
|
||||
],
|
||||
'body' => file_get_contents($file),
|
||||
'body' => file_get_contents($file ?? ''),
|
||||
])
|
||||
);
|
||||
$text = $response->getBody();
|
||||
|
Loading…
x
Reference in New Issue
Block a user