From da0ac49d5f8d2cc5147414c8e0c9adac945790ac Mon Sep 17 00:00:00 2001 From: Ingo Schommer Date: Fri, 26 Aug 2011 17:57:05 +0200 Subject: [PATCH] API CHANGE Rewriting underscores to dashes in files uploaded through Upload->load(), Folder->addUploadToFolder() or Image->loadUploadedImage(). Transliterating non-ASCII characters automatically (turn off via FileNameFilter::$default_use_transliterator=false) ENHANCEMENT New FileNameFilter class for a more customisable way to influence filename filtering in Upload->load(), Folder->addUploadToFolder() or Image->loadUploadedImage() --- filesystem/File.php | 9 +- filesystem/FileNameFilter.php | 119 ++++++++++++++++++++++++ filesystem/Folder.php | 8 +- filesystem/Upload.php | 9 +- model/Image.php | 15 +-- tests/filesystem/FileNameFilterTest.php | 54 +++++++++++ tests/filesystem/UploadTest.php | 30 +++--- 7 files changed, 207 insertions(+), 37 deletions(-) create mode 100644 filesystem/FileNameFilter.php create mode 100644 tests/filesystem/FileNameFilterTest.php diff --git a/filesystem/File.php b/filesystem/File.php index fb4dd4f34..8ee575584 100644 --- a/filesystem/File.php +++ b/filesystem/File.php @@ -437,13 +437,8 @@ class File extends DataObject { if(!$name) $name = $this->Title; // Fix illegal characters - $name = ereg_replace(' +','-',trim($name)); // Replace any spaces - $name = ereg_replace('[^A-Za-z0-9.+_\-]','',$name); // Replace non alphanumeric characters - - // Remove all leading dots or underscores - while(!empty($name) && ($name[0] == '_' || $name[0] == '.')) { - $name = substr($name, 1); - } + $filter = Object::create('FileNameFilter'); + $name = $filter->filter($name); // We might have just turned it blank, so check again. if(!$name) $name = 'new-folder'; diff --git a/filesystem/FileNameFilter.php b/filesystem/FileNameFilter.php new file mode 100644 index 000000000..66627e050 --- /dev/null +++ b/filesystem/FileNameFilter.php @@ -0,0 +1,119 @@ + + * FileNameFilter::$default_use_transliterator = false; + * FileNameFilter::$default_replacements = array(); + * + */ +class FileNameFilter { + + /** + * @var Boolean + */ + static $default_use_transliterator = true; + + /** + * @var Array See {@link setReplacements()}. + */ + static $default_replacements = array( + '/\s/' => '-', // remove whitespace + '/_/' => '-', // underscores to dashes + '/[^A-Za-z0-9+.-]+/' => '', // remove non-ASCII chars, only allow alphanumeric plus dash and dot + '/[\-]{2,}/' => '-', // remove duplicate dashes + '/^[\.\-_]/' => '', // Remove all leading dots, dashes or underscores + ); + + /** + * @var Array See {@link setReplacements()} + */ + public $replacements = array(); + + /** + * Depending on the applied replacement rules, this method + * might result in an empty string. In this case, {@link getDefaultName()} + * will be used to return a randomly generated file name, while retaining its extension. + * + * @param String Filename including extension (not path). + * @return String A filtered filename + */ + function filter($name) { + $ext = pathinfo($name, PATHINFO_EXTENSION); + + $transliterator = $this->getTransliterator(); + if($transliterator) $name = $transliterator->toASCII($name); + foreach($this->getReplacements() as $regex => $replace) { + $name = preg_replace($regex, $replace, $name); + } + + // Safeguard against empty file names + $nameWithoutExt = pathinfo($name, PATHINFO_FILENAME); + if(empty($nameWithoutExt)) $name = $this->getDefaultName() . '.' . $ext; + + return $name; + } + + /** + * Take care not to add replacements which might invalidate the file structure, + * e.g. removing dots will remove file extension information. + * + * @param Array Map of find/replace used for preg_replace(). + */ + function setReplacements($r) { + $this->replacements = $r; + } + + /** + * @return Array + */ + function getReplacements() { + return ($this->replacements) ? $this->replacements : self::$default_replacements; + } + + /** + * @var Transliterator + */ + protected $transliterator; + + /** + * @return Transliterator|NULL + */ + function getTransliterator() { + if(!$this->transliterator === null && self::$default_use_transliterator) { + $this->transliterator = Object::create('Transliterator'); + } + return $this->transliterator; + } + + /** + * @param Transliterator|FALSE + */ + function setTransliterator($t) { + $this->transliterator = $t; + } + + /** + * @return String File name without extension + */ + function getDefaultName() { + return (string)uniqid(); + } +} \ No newline at end of file diff --git a/filesystem/Folder.php b/filesystem/Folder.php index 5b50c99d1..3e074893f 100644 --- a/filesystem/Folder.php +++ b/filesystem/Folder.php @@ -203,6 +203,8 @@ class Folder extends File { /** * Take a file uploaded via a POST form, and save it inside this folder. + * File names are filtered through {@link FileNameFilter}, see class documentation + * on how to influence this behaviour. */ function addUploadToFolder($tmpFile) { if(!is_array($tmpFile)) { @@ -216,10 +218,8 @@ class Folder extends File { // $parentFolder = Folder::findOrMake("Uploads"); // Generate default filename - $file = str_replace(' ', '-',$tmpFile['name']); - $file = ereg_replace('[^A-Za-z0-9+.-]+','',$file); - $file = ereg_replace('-+', '-',$file); - + $nameFilter = Object::create('FileNameFilter'); + $file = $nameFilter->filter($tmpFile['name']); while($file[0] == '_' || $file[0] == '.') { $file = substr($file, 1); } diff --git a/filesystem/Upload.php b/filesystem/Upload.php index 137e7079d..9fb13dd17 100644 --- a/filesystem/Upload.php +++ b/filesystem/Upload.php @@ -88,6 +88,8 @@ class Upload extends Controller { /** * Save an file passed from a form post into this object. + * File names are filtered through {@link FileNameFilter}, see class documentation + * on how to influence this behaviour. * * @param $tmpFile array Indexed array that PHP generated for every file it uploads. * @param $folderPath string Folder path relative to /assets @@ -129,10 +131,9 @@ class Upload extends Controller { } // Generate default filename - $fileName = str_replace(' ', '-',$tmpFile['name']); - $fileName = ereg_replace('[^A-Za-z0-9+.-]+','',$fileName); - $fileName = ereg_replace('-+', '-',$fileName); - $fileName = basename($fileName); + $nameFilter = Object::create('FileNameFilter'); + $file = $nameFilter->filter($tmpFile['name']); + $fileName = basename($file); $relativeFilePath = ASSETS_DIR . "/" . $folderPath . "/$fileName"; diff --git a/model/Image.php b/model/Image.php index 6df8a5203..9740bdedd 100644 --- a/model/Image.php +++ b/model/Image.php @@ -113,6 +113,10 @@ class Image extends File { return $this->getTag(); } + /** + * File names are filtered through {@link FileNameFilter}, see class documentation + * on how to influence this behaviour. + */ function loadUploadedImage($tmpFile) { if(!is_array($tmpFile)) { user_error("Image::loadUploadedImage() Not passed an array. Most likely, the form hasn't got the right enctype", E_USER_ERROR); @@ -134,12 +138,9 @@ class Image extends File { } // Generate default filename - $file = str_replace(' ', '-',$tmpFile['name']); - $file = ereg_replace('[^A-Za-z0-9+.-]+','',$file); - $file = ereg_replace('-+', '-',$file); - if(!$file) { - $file = "file.jpg"; - } + $nameFilter = Object::create('FileNameFilter'); + $file = $nameFilter->filter($tmpFile['name']); + if(!$file) $file = "file.jpg"; $file = ASSETS_PATH . "/$class/$file"; @@ -480,4 +481,4 @@ class Image_Cached extends Image { public function write($showDebug = false, $forceInsert = false, $forceWrite = false, $writeComponents = false) { throw new Exception("{$this->ClassName} can not be written back to the database."); } -} \ No newline at end of file +} diff --git a/tests/filesystem/FileNameFilterTest.php b/tests/filesystem/FileNameFilterTest.php new file mode 100644 index 000000000..5a4039a69 --- /dev/null +++ b/tests/filesystem/FileNameFilterTest.php @@ -0,0 +1,54 @@ +assertEquals( + 'Brtchen-fr-all-mit-Unterstrich.jpg', + $filter->filter($name) + ); + } + + function testFilterWithTransliterator() { + $name = 'Brötchen für allë-mit_Unterstrich!.jpg'; + $filter = new FileNameFilter(); + $filter->setTransliterator(Object::create('Transliterator')); + $this->assertEquals( + 'Broetchen-fuer-alle-mit-Unterstrich.jpg', + $filter->filter($name) + ); + } + + function testFilterWithCustomRules() { + $name = 'Brötchen für allë-mit_Unterstrich!.jpg'; + $filter = new FileNameFilter(); + $filter->setReplacements(array('/[\s-]/' => '_')); + $this->assertEquals( + 'Brötchen__für_allë_mit_Unterstrich!.jpg', + $filter->filter($name) + ); + } + + function testFilterWithEmptyString() { + $name = 'ö ö ö.jpg'; + $filter = new FileNameFilter(); + $result = $filter->filter($name); + $this->assertFalse( + empty($result) + ); + $this->assertStringEndsWith( + '.jpg', + $result + ); + $this->assertGreaterThan( + strlen('.jpg'), + strlen($result) + ); + } + +} \ No newline at end of file diff --git a/tests/filesystem/UploadTest.php b/tests/filesystem/UploadTest.php index 7782308af..23ef64213 100644 --- a/tests/filesystem/UploadTest.php +++ b/tests/filesystem/UploadTest.php @@ -8,7 +8,7 @@ class UploadTest extends SapphireTest { function testUpload() { // create tmp file - $tmpFileName = 'UploadTest_testUpload.txt'; + $tmpFileName = 'UploadTest-testUpload.txt'; $tmpFilePath = TEMP_FOLDER . '/' . $tmpFileName; $tmpFileContent = ''; for($i=0; $i<10000; $i++) $tmpFileContent .= '0'; @@ -42,7 +42,7 @@ class UploadTest extends SapphireTest { $file1->delete(); // test upload into custom folder - $customFolder = 'UploadTest_testUpload'; + $customFolder = 'UploadTest-testUpload'; $u2 = new Upload(); $u2->load($tmpFile, $customFolder); $file2 = $u2->getFile(); @@ -62,7 +62,7 @@ class UploadTest extends SapphireTest { function testAllowedFilesize() { // create tmp file - $tmpFileName = 'UploadTest_testUpload.txt'; + $tmpFileName = 'UploadTest-testUpload.txt'; $tmpFilePath = TEMP_FOLDER . '/' . $tmpFileName; $tmpFileContent = ''; for($i=0; $i<10000; $i++) $tmpFileContent .= '0'; @@ -91,7 +91,7 @@ class UploadTest extends SapphireTest { function testAllowedSizeOnFileWithNoExtension() { // create tmp file - $tmpFileName = 'UploadTest_testUpload'; + $tmpFileName = 'UploadTest-testUpload'; $tmpFilePath = TEMP_FOLDER . '/' . $tmpFileName; $tmpFileContent = ''; for($i=0; $i<10000; $i++) $tmpFileContent .= '0'; @@ -120,7 +120,7 @@ class UploadTest extends SapphireTest { function testUploadDoesNotAllowUnknownExtension() { // create tmp file - $tmpFileName = 'UploadTest_testUpload.php'; + $tmpFileName = 'UploadTest-testUpload.php'; $tmpFilePath = TEMP_FOLDER . '/' . $tmpFileName; $tmpFileContent = ''; for($i=0; $i<10000; $i++) $tmpFileContent .= '0'; @@ -149,7 +149,7 @@ class UploadTest extends SapphireTest { function testUploadAcceptsAllowedExtension() { // create tmp file - $tmpFileName = 'UploadTest_testUpload.txt'; + $tmpFileName = 'UploadTest-testUpload.txt'; $tmpFilePath = TEMP_FOLDER . '/' . $tmpFileName; $tmpFileContent = ''; for($i=0; $i<10000; $i++) $tmpFileContent .= '0'; @@ -182,7 +182,7 @@ class UploadTest extends SapphireTest { function testUploadDeniesNoExtensionFilesIfNoEmptyStringSetForValidatorExtensions() { // create tmp file - $tmpFileName = 'UploadTest_testUpload'; + $tmpFileName = 'UploadTest-testUpload'; $tmpFilePath = TEMP_FOLDER . '/' . $tmpFileName; $tmpFileContent = ''; for($i=0; $i<10000; $i++) $tmpFileContent .= '0'; @@ -224,7 +224,7 @@ class UploadTest extends SapphireTest { function testUploadTarGzFileTwiceAppendsNumber() { // create tmp file - $tmpFileName = 'UploadTest_testUpload.tar.gz'; + $tmpFileName = 'UploadTest-testUpload.tar.gz'; $tmpFilePath = TEMP_FOLDER . '/' . $tmpFileName; $tmpFileContent = ''; for($i=0; $i<10000; $i++) $tmpFileContent .= '0'; @@ -241,14 +241,14 @@ class UploadTest extends SapphireTest { ); // Make sure there are none here, otherwise they get renamed incorrectly for the test. - $this->deleteTestUploadFiles("/UploadTesttestUpload.*tar\.gz/"); + $this->deleteTestUploadFiles("/UploadTest-testUpload.*tar\.gz/"); // test upload into default folder $u = new Upload(); $u->load($tmpFile); $file = $u->getFile(); $this->assertEquals( - 'UploadTesttestUpload.tar.gz', + 'UploadTest-testUpload.tar.gz', $file->Name, 'File has a name without a number because it\'s not a duplicate' ); @@ -257,7 +257,7 @@ class UploadTest extends SapphireTest { $u->load($tmpFile); $file2 = $u->getFile(); $this->assertEquals( - 'UploadTesttestUpload2.tar.gz', + 'UploadTest-testUpload2.tar.gz', $file2->Name, 'File receives a number attached to the end before the extension' ); @@ -268,7 +268,7 @@ class UploadTest extends SapphireTest { function testUploadFileWithNoExtensionTwiceAppendsNumber() { // create tmp file - $tmpFileName = 'UploadTest_testUpload'; + $tmpFileName = 'UploadTest-testUpload'; $tmpFilePath = TEMP_FOLDER . '/' . $tmpFileName; $tmpFileContent = ''; for($i=0; $i<10000; $i++) $tmpFileContent .= '0'; @@ -285,7 +285,7 @@ class UploadTest extends SapphireTest { ); // Make sure there are none here, otherwise they get renamed incorrectly for the test. - $this->deleteTestUploadFiles("/UploadTesttestUpload.*/"); + $this->deleteTestUploadFiles("/UploadTest-testUpload.*/"); $v = new UploadTest_Validator(); $v->setAllowedExtensions(array('')); @@ -297,7 +297,7 @@ class UploadTest extends SapphireTest { $file = $u->getFile(); $this->assertEquals( - 'UploadTesttestUpload', + 'UploadTest-testUpload', $file->Name, 'File is uploaded without extension' ); @@ -307,7 +307,7 @@ class UploadTest extends SapphireTest { $u->load($tmpFile); $file2 = $u->getFile(); $this->assertEquals( - 'UploadTesttestUpload_2', + 'UploadTest-testUpload-2', $file2->Name, 'File receives a number attached to the end' );