get($class, 'priority'); } arsort($classPriorities); // Save classes $sortedClasses = array_keys($classPriorities); return self::$sorted_extractor_classes = $sortedClasses; } /** * Get the text file extractor for the given class * * @param string $class * @return FileTextExtractor */ protected static function get_extractor($class) { return Injector::inst()->get($class); } /** * Attempt to detect mime type for given file * * @param string $path * @return string Mime type if found */ protected static function get_mime($path) { $file = new \Symfony\Component\HttpFoundation\File\File($path); return $file->getMimeType(); } /** * Given a File object, decide which extractor instance to use to handle it * * @param File $file * @return FileTextExtractor|null */ public static function for_file(File $file) { if (!$file) { return null; } $extension = $file->getExtension(); $mime = $file->getMimeType(); foreach (self::get_extractor_classes() as $className) { $extractor = self::get_extractor($className); // Skip unavailable extractors if (!$extractor->isAvailable()) { continue; } // Check extension if ($extension && $extractor->supportsExtension($extension)) { return $extractor; } // Check mime if ($mime && $extractor->supportsMime($mime)) { return $extractor; } } } /** * Some text extractors (like pdftotext) may require a physical file to read from, so write the current * file contents to a temp file and return its path * * @param File $file * @return string * @throws Exception */ protected function getPathFromFile(File $file) { $path = tempnam(TEMP_PATH, 'pdftextextractor_'); if (false === $path) { throw new Exception(static::class . '->getPathFromFile() could not allocate temporary file name'); } // Append extension to temp file if one is set if ($file->getExtension()) { $path .= '.' . $file->getExtension(); } // Remove any existing temp files with this name unlink($path); $bytesWritten = file_put_contents($path, $file->getStream()); if (false === $bytesWritten) { throw new Exception(static::class . '->getPathFromFile() failed to write temporary file'); } return $path; } /** * Checks if the extractor is supported on the current environment, * for example if the correct binaries or libraries are available. * * @return boolean */ abstract public function isAvailable(); /** * Determine if this extractor supports the given extension. * If support is determined by mime/type only, then this should return false. * * @param string $extension * @return boolean */ abstract public function supportsExtension($extension); /** * Determine if this extractor supports the given mime type. * Will only be called if supportsExtension returns false. * * @param string $mime * @return boolean */ abstract public function supportsMime($mime); /** * Given a File instance, extract the contents as text. * * @param File $file * @return string */ abstract public function getContent(File $file); }