40 lines
1002 B
PHP
Raw Normal View History

2012-08-22 17:52:08 +02:00
<?php
/**
* Text extractor that calls pdftotext to do the conversion.
* @author mstephens
*
*/
class PDFTextExtractor extends FileTextExtractor {
2012-08-22 18:25:55 +02:00
function isAvailable() {
$bin = $this->bin('pdftotext');
return (file_exists($bin) && is_executable($bin));
}
2012-08-22 17:52:08 +02:00
function supportedExtensions() {
return array("pdf");
}
/**
* Accessor to get the location of the binary
* @param $prog
* @return unknown_type
*/
function bin($prog='') {
if ($this->stat('binary_location')) $path = $this->stat('binary_location'); // By static from _config.php
elseif (file_exists('/usr/bin/pdftotext')) $path = '/usr/bin'; // By searching common directories
elseif (file_exists('/usr/local/bin/pdftotext')) $path = '/usr/local/bin';
else $path = '.'; // Hope it's in path
return ( $path ? $path . '/' : '' ) . $prog;
}
function getContent($path) {
if (!$path) return ""; // no file
$content = `{$this->bin('pdftotext')} "$path" -`;
2012-08-22 17:52:08 +02:00
return $content;
}
}
?>