From 8aca06aef2eea737215400af4faa59371eb28c4d Mon Sep 17 00:00:00 2001 From: Ingo Schommer Date: Thu, 7 May 2015 19:14:02 +1200 Subject: [PATCH] Truncate FileContentCache by default to avoid SQL query errors MySQL has a packet limit of 1MB as a default (http://dev.mysql.com/doc/refman/5.0/en/packet-too-large.html). This interferes with the UPDATE queries required to add file content caches. Since the query can't be terminated correctly, the whole content will be discarded with a query error. This change allows to truncate content prior to the UPDATE operation, and defaults to 500 characters. This leaves some room for multibyte characters as well as other parts of the SQL query. --- README.md | 4 ++++ _config/config.yml | 3 +++ code/extensions/FileTextCache.php | 6 ++++-- tests/FileTextCacheDatabaseTest.php | 17 +++++++++++++++++ 4 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 tests/FileTextCacheDatabaseTest.php diff --git a/README.md b/README.md index f943ef6..ceb08eb 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,10 @@ File: ``` By default any extracted content will be cached against the database row. +In order to stay within common size constraints for SQL queries required in this operation, +the cache sets a maximum character length after which content gets truncated (default: 500000). +You can configure this value through `FileTextCache_Database.max_content_length` in your yaml configuration. + Alternatively, extracted content can be cached using SS_Cache to prevent excessive database growth. In order to swap out the cache backend you can use the following yaml configuration. diff --git a/_config/config.yml b/_config/config.yml index 4073835..be5f178 100644 --- a/_config/config.yml +++ b/_config/config.yml @@ -6,3 +6,6 @@ Injector: SolrCellTextExtractor: # base_url: 'http://localhost:8983/solr/update/extract' + +FileTextCache_Database: + max_content_length: 500000 \ No newline at end of file diff --git a/code/extensions/FileTextCache.php b/code/extensions/FileTextCache.php index cbc0750..80507ae 100644 --- a/code/extensions/FileTextCache.php +++ b/code/extensions/FileTextCache.php @@ -19,7 +19,8 @@ interface FileTextCache { } /** - * Caches the extracted content on the record for the file + * Caches the extracted content on the record for the file. + * Limits the stored file content by default to avoid hitting query size limits. */ class FileTextCache_Database implements FileTextCache { @@ -28,7 +29,8 @@ class FileTextCache_Database implements FileTextCache { } public function save(File $file, $content) { - $file->FileContentCache = $content; + $maxLength = Config::inst()->get('FileTextCache_Database', 'max_content_length'); + $file->FileContentCache = ($maxLength) ? substr($content, 0, $maxLength) : $content; $file->write(); } diff --git a/tests/FileTextCacheDatabaseTest.php b/tests/FileTextCacheDatabaseTest.php new file mode 100644 index 0000000..d0caf60 --- /dev/null +++ b/tests/FileTextCacheDatabaseTest.php @@ -0,0 +1,17 @@ +update('FileTextCache_Database', 'max_content_length', 5); + $cache = new FileTextCache_Database(); + $file = $this->getMock('File', array('write')); + $content = '0123456789'; + $cache->save($file, $content); + $this->assertEquals($cache->load($file), '01234'); + + Config::unnest(); + } + +} \ No newline at end of file