mirror of
https://github.com/silverstripe/silverstripe-textextraction
synced 2024-10-22 11:06:00 +02:00
Provide alternative backends for caching of extracted content
Implement Flushable for clearing the cache
This commit is contained in:
parent
98a83a5bca
commit
98fd4228f9
18
README.md
18
README.md
@ -63,6 +63,24 @@ File:
|
|||||||
- FileTextExtractable
|
- FileTextExtractable
|
||||||
```
|
```
|
||||||
|
|
||||||
|
By default any extracted content will be cached against the database row.
|
||||||
|
|
||||||
|
Alternatively, extracted content can be cached using SS_Cache to prevent excessive database growth.
|
||||||
|
In order to swap out the cache backend you can use the following yaml configuration.
|
||||||
|
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
---
|
||||||
|
Name: mytextextraction
|
||||||
|
After: '#textextraction'
|
||||||
|
---
|
||||||
|
Injector:
|
||||||
|
FileTextCache: FileTextCache_SSCache
|
||||||
|
FileTextCache_SSCache:
|
||||||
|
lifetime: 3600 # Number of seconds to cache content for
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
### XPDF
|
### XPDF
|
||||||
|
|
||||||
PDFs require special handling, for example through the [XPDF](http://www.foolabs.com/xpdf/)
|
PDFs require special handling, for example through the [XPDF](http://www.foolabs.com/xpdf/)
|
||||||
|
@ -1,2 +1,8 @@
|
|||||||
|
---
|
||||||
|
Name: textextraction
|
||||||
|
---
|
||||||
|
Injector:
|
||||||
|
FileTextCache: FileTextCache_Database
|
||||||
|
|
||||||
SolrCellTextExtractor:
|
SolrCellTextExtractor:
|
||||||
# base_url: 'http://localhost:8983/solr/update/extract'
|
# base_url: 'http://localhost:8983/solr/update/extract'
|
||||||
|
81
code/extensions/FileTextCache.php
Normal file
81
code/extensions/FileTextCache.php
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
interface FileTextCache {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save extracted content for a given File entity
|
||||||
|
*
|
||||||
|
* @param File $file
|
||||||
|
* @param string $content
|
||||||
|
*/
|
||||||
|
public function save(File $file, $content);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return any cached extracted content for a given file entity
|
||||||
|
*
|
||||||
|
* @param File $file
|
||||||
|
*/
|
||||||
|
public function load(File $file);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Caches the extracted content on the record for the file
|
||||||
|
*/
|
||||||
|
class FileTextCache_Database implements FileTextCache {
|
||||||
|
|
||||||
|
public function load(File $file) {
|
||||||
|
return $file->FileContentCache;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function save(File $file, $content) {
|
||||||
|
$file->FileContentCache = $content;
|
||||||
|
$file->write();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Uses SS_Cache with a lifetime to cache extracted content
|
||||||
|
*/
|
||||||
|
class FileTextCache_SSCache implements FileTextCache, Flushable {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default cache to 1 hour
|
||||||
|
*
|
||||||
|
* @var int
|
||||||
|
* @config
|
||||||
|
*/
|
||||||
|
private static $lifetime = 3600;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return SS_Cache
|
||||||
|
*/
|
||||||
|
protected static function get_cache() {
|
||||||
|
$lifetime = Config::inst()->get(__CLASS__, 'lifetime');
|
||||||
|
$cache = SS_Cache::factory(__CLASS__);
|
||||||
|
$cache->setLifetime($lifetime);
|
||||||
|
return $cache;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function getKey(File $file) {
|
||||||
|
return md5($file->getFullPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function load(File $file) {
|
||||||
|
$key = $this->getKey($file);
|
||||||
|
$cache = self::get_cache();
|
||||||
|
return $cache->load($key);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function save(File $file, $content) {
|
||||||
|
$key = $this->getKey($file);
|
||||||
|
$cache = self::get_cache();
|
||||||
|
return $cache->save($content, $key);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static function flush() {
|
||||||
|
$cache = self::get_cache();
|
||||||
|
$cache->clean();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -19,6 +19,30 @@ class FileTextExtractable extends DataExtension {
|
|||||||
'FileContent' => 'Text'
|
'FileContent' => 'Text'
|
||||||
);
|
);
|
||||||
|
|
||||||
|
private static $dependencies = array(
|
||||||
|
'TextCache' => '%$FileTextCache'
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var FileTextCache
|
||||||
|
*/
|
||||||
|
protected $fileTextCache = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param FileTextCache $cache
|
||||||
|
*/
|
||||||
|
public function setTextCache(FileTextCache $cache) {
|
||||||
|
$this->fileTextCache = $cache;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return FileTextCache
|
||||||
|
*/
|
||||||
|
public function getTextCache() {
|
||||||
|
return $this->fileTextCache;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Helper function for template
|
* Helper function for template
|
||||||
*
|
*
|
||||||
@ -37,17 +61,25 @@ class FileTextExtractable extends DataExtension {
|
|||||||
* @return string
|
* @return string
|
||||||
*/
|
*/
|
||||||
public function extractFileAsText($disableCache = false) {
|
public function extractFileAsText($disableCache = false) {
|
||||||
if (!$disableCache && $this->owner->FileContentCache) return $this->owner->FileContentCache;
|
if (!$disableCache) {
|
||||||
|
$text = $this->getTextCache()->load($this->owner);
|
||||||
|
if($text) {
|
||||||
|
return $text;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Determine which extractor can process this file.
|
// Determine which extractor can process this file.
|
||||||
$extractor = FileTextExtractor::for_file($this->owner->FullPath);
|
$extractor = FileTextExtractor::for_file($this->owner->FullPath);
|
||||||
if (!$extractor) return null;
|
if (!$extractor) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
$text = $extractor->getContent($this->owner->FullPath);
|
$text = $extractor->getContent($this->owner->FullPath);
|
||||||
if (!$text) return null;
|
if (!$text) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
$this->owner->FileContentCache = $text;
|
$this->getTextCache()->save($this->owner, $text);
|
||||||
$this->owner->write();
|
|
||||||
|
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user