mirror of
https://github.com/silverstripe/silverstripe-textextraction
synced 2024-10-22 11:06:00 +02:00
Provide alternative backends for caching of extracted content
Implement Flushable for clearing the cache
This commit is contained in:
parent
98a83a5bca
commit
98fd4228f9
18
README.md
18
README.md
@ -63,6 +63,24 @@ File:
|
||||
- FileTextExtractable
|
||||
```
|
||||
|
||||
By default any extracted content will be cached against the database row.
|
||||
|
||||
Alternatively, extracted content can be cached using SS_Cache to prevent excessive database growth.
|
||||
In order to swap out the cache backend you can use the following yaml configuration.
|
||||
|
||||
|
||||
```yaml
|
||||
---
|
||||
Name: mytextextraction
|
||||
After: '#textextraction'
|
||||
---
|
||||
Injector:
|
||||
FileTextCache: FileTextCache_SSCache
|
||||
FileTextCache_SSCache:
|
||||
lifetime: 3600 # Number of seconds to cache content for
|
||||
|
||||
```
|
||||
|
||||
### XPDF
|
||||
|
||||
PDFs require special handling, for example through the [XPDF](http://www.foolabs.com/xpdf/)
|
||||
|
@ -1,2 +1,8 @@
|
||||
---
|
||||
Name: textextraction
|
||||
---
|
||||
Injector:
|
||||
FileTextCache: FileTextCache_Database
|
||||
|
||||
SolrCellTextExtractor:
|
||||
# base_url: 'http://localhost:8983/solr/update/extract'
|
||||
# base_url: 'http://localhost:8983/solr/update/extract'
|
||||
|
81
code/extensions/FileTextCache.php
Normal file
81
code/extensions/FileTextCache.php
Normal file
@ -0,0 +1,81 @@
|
||||
<?php
|
||||
|
||||
interface FileTextCache {
|
||||
|
||||
/**
|
||||
* Save extracted content for a given File entity
|
||||
*
|
||||
* @param File $file
|
||||
* @param string $content
|
||||
*/
|
||||
public function save(File $file, $content);
|
||||
|
||||
/**
|
||||
* Return any cached extracted content for a given file entity
|
||||
*
|
||||
* @param File $file
|
||||
*/
|
||||
public function load(File $file);
|
||||
}
|
||||
|
||||
/**
|
||||
* Caches the extracted content on the record for the file
|
||||
*/
|
||||
class FileTextCache_Database implements FileTextCache {
|
||||
|
||||
public function load(File $file) {
|
||||
return $file->FileContentCache;
|
||||
}
|
||||
|
||||
public function save(File $file, $content) {
|
||||
$file->FileContentCache = $content;
|
||||
$file->write();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Uses SS_Cache with a lifetime to cache extracted content
|
||||
*/
|
||||
class FileTextCache_SSCache implements FileTextCache, Flushable {
|
||||
|
||||
/**
|
||||
* Default cache to 1 hour
|
||||
*
|
||||
* @var int
|
||||
* @config
|
||||
*/
|
||||
private static $lifetime = 3600;
|
||||
|
||||
/**
|
||||
* @return SS_Cache
|
||||
*/
|
||||
protected static function get_cache() {
|
||||
$lifetime = Config::inst()->get(__CLASS__, 'lifetime');
|
||||
$cache = SS_Cache::factory(__CLASS__);
|
||||
$cache->setLifetime($lifetime);
|
||||
return $cache;
|
||||
}
|
||||
|
||||
protected function getKey(File $file) {
|
||||
return md5($file->getFullPath);
|
||||
}
|
||||
|
||||
public function load(File $file) {
|
||||
$key = $this->getKey($file);
|
||||
$cache = self::get_cache();
|
||||
return $cache->load($key);
|
||||
}
|
||||
|
||||
public function save(File $file, $content) {
|
||||
$key = $this->getKey($file);
|
||||
$cache = self::get_cache();
|
||||
return $cache->save($content, $key);
|
||||
}
|
||||
|
||||
public static function flush() {
|
||||
$cache = self::get_cache();
|
||||
$cache->clean();
|
||||
}
|
||||
|
||||
}
|
@ -19,6 +19,30 @@ class FileTextExtractable extends DataExtension {
|
||||
'FileContent' => 'Text'
|
||||
);
|
||||
|
||||
private static $dependencies = array(
|
||||
'TextCache' => '%$FileTextCache'
|
||||
);
|
||||
|
||||
/**
|
||||
* @var FileTextCache
|
||||
*/
|
||||
protected $fileTextCache = null;
|
||||
|
||||
/**
|
||||
*
|
||||
* @param FileTextCache $cache
|
||||
*/
|
||||
public function setTextCache(FileTextCache $cache) {
|
||||
$this->fileTextCache = $cache;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return FileTextCache
|
||||
*/
|
||||
public function getTextCache() {
|
||||
return $this->fileTextCache;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function for template
|
||||
*
|
||||
@ -37,17 +61,25 @@ class FileTextExtractable extends DataExtension {
|
||||
* @return string
|
||||
*/
|
||||
public function extractFileAsText($disableCache = false) {
|
||||
if (!$disableCache && $this->owner->FileContentCache) return $this->owner->FileContentCache;
|
||||
if (!$disableCache) {
|
||||
$text = $this->getTextCache()->load($this->owner);
|
||||
if($text) {
|
||||
return $text;
|
||||
}
|
||||
}
|
||||
|
||||
// Determine which extractor can process this file.
|
||||
$extractor = FileTextExtractor::for_file($this->owner->FullPath);
|
||||
if (!$extractor) return null;
|
||||
if (!$extractor) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$text = $extractor->getContent($this->owner->FullPath);
|
||||
if (!$text) return null;
|
||||
if (!$text) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$this->owner->FileContentCache = $text;
|
||||
$this->owner->write();
|
||||
$this->getTextCache()->save($this->owner, $text);
|
||||
|
||||
return $text;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user