get($class, 'priority'); } arsort($classPriorities); // Save classes $sortedClasses = array_keys($classPriorities); return self::$sorted_extractor_classes = $sortedClasses; } /** * Get the text file extractor for the given class * * @param string $class * @return FileTextExtractor */ protected static function get_extractor($class) { return Injector::inst()->get($class); } /** * Attempt to detect mime type for given file * * @param string $path * @return string Mime type if found */ protected static function get_mime($path) { if(!class_exists('finfo')) return null; // Check mime of file $finfo = new finfo(FILEINFO_MIME_TYPE); return $finfo->file($path); } /** * @param string $path * @return FileTextExtractor */ static function for_file($path) { $extension = pathinfo($path, PATHINFO_EXTENSION); $mime = self::get_mime($path); foreach(self::get_extractor_classes() as $className) { $extractor = self::get_extractor($className); // Skip unavailable extractors if(!$extractor->isAvailable()) continue; // Check extension if($extension && $extractor->supportsExtension($extension)) { return $extractor; } // Check mime if($mime && $extractor->supportsMime($mime)) { return $extractor; } } } /** * Checks if the extractor is supported on the current environment, * for example if the correct binaries or libraries are available. * * @return boolean */ abstract public function isAvailable(); /** * Determine if this extractor supports the given extension. * If support is determined by mime/type only, then this should return false. * * @param string $extension * @return boolean */ abstract public function supportsExtension($extension); /** * Determine if this extractor suports the given mime type. * Will only be called if supportsExtension returns false. * * @param string $mime * @return boolean */ abstract public function supportsMime($mime); /** * Given a file path, extract the contents as text. * * @param string $path * @return string */ abstract public function getContent($path); } class FileTextExtractor_Exception extends Exception {}