From ced2ba1f64015c92dfeea6b473cf3f1999e449c6 Mon Sep 17 00:00:00 2001 From: Daniel Hensby Date: Wed, 21 Feb 2018 20:22:37 +0000 Subject: [PATCH] API Move CSV writing/reading to league/csv library --- composer.json | 1 + src/Dev/BulkLoader_Result.php | 2 +- src/Dev/CSVParser.php | 2 + src/Dev/CsvBulkLoader.php | 90 ++++++++++++++----- src/Forms/GridField/GridFieldExportButton.php | 64 ++++++++----- tests/php/Dev/CsvBulkLoaderTest.php | 50 ++++++++--- .../CsvBulkLoaderTest/csv/PlayersWithTabs.csv | 6 ++ .../GridField/GridFieldExportButtonTest.php | 59 ++++++++---- 8 files changed, 203 insertions(+), 71 deletions(-) create mode 100644 tests/php/Dev/CsvBulkLoaderTest/csv/PlayersWithTabs.csv diff --git a/composer.json b/composer.json index 921f3162a..7fd5c8548 100644 --- a/composer.json +++ b/composer.json @@ -24,6 +24,7 @@ "require": { "composer/installers": "~1.0", "embed/embed": "^3.0", + "league/csv": "^8", "league/flysystem": "~1.0.12", "monolog/monolog": "~1.11", "nikic/php-parser": "^2 || ^3", diff --git a/src/Dev/BulkLoader_Result.php b/src/Dev/BulkLoader_Result.php index 957154105..d4be69759 100644 --- a/src/Dev/BulkLoader_Result.php +++ b/src/Dev/BulkLoader_Result.php @@ -15,7 +15,7 @@ use SilverStripe\View\ArrayData; * * @author Ingo Schommer, Silverstripe Ltd. (@silverstripe.com) */ -class BulkLoader_Result +class BulkLoader_Result implements \Countable { use Injectable; diff --git a/src/Dev/CSVParser.php b/src/Dev/CSVParser.php index 6c26ae1d5..358b2df20 100644 --- a/src/Dev/CSVParser.php +++ b/src/Dev/CSVParser.php @@ -2,6 +2,7 @@ namespace SilverStripe\Dev; +use League\Csv\Reader; use SilverStripe\Core\Injector\Injectable; use Iterator; @@ -114,6 +115,7 @@ class CSVParser implements Iterator */ public function __construct($filename, $delimiter = ",", $enclosure = '"') { + Deprecation::notice('5.0', __CLASS__ . ' is deprecated, use ' . Reader::class . ' instead'); $filename = Director::getAbsFile($filename); $this->filename = $filename; $this->delimiter = $delimiter; diff --git a/src/Dev/CsvBulkLoader.php b/src/Dev/CsvBulkLoader.php index 4d4fc6f0a..10daa3015 100644 --- a/src/Dev/CsvBulkLoader.php +++ b/src/Dev/CsvBulkLoader.php @@ -2,9 +2,9 @@ namespace SilverStripe\Dev; +use League\Csv\Reader; use SilverStripe\Control\Director; use SilverStripe\ORM\DataObject; -use Exception; /** * Utility class to facilitate complex CSV-imports by defining column-mappings @@ -67,37 +67,85 @@ class CsvBulkLoader extends BulkLoader */ protected function processAll($filepath, $preview = false) { - $filepath = Director::getAbsFile($filepath); - $files = $this->splitFile($filepath); - - $result = null; - $last = null; + $previousDetectLE = ini_get('auto_detect_line_endings'); + ini_set('auto_detect_line_endings', true); try { - foreach ($files as $file) { - $last = $file; + $filepath = Director::getAbsFile($filepath); + $csvReader = Reader::createFromPath($filepath, 'r'); - $next = $this->processChunk($file, $preview); - - if ($result instanceof BulkLoader_Result) { - $result->merge($next); - } else { - $result = $next; + $tabExtractor = function ($row, $rowOffset, $iterator) { + foreach ($row as &$item) { + // [SS-2017-007] Ensure all cells with leading tab and then [@=+] have the tab removed on import + if (preg_match("/^\t[\-@=\+]+.*/", $item)) { + $item = ltrim($item, "\t"); + } } + return $row; + }; - @unlink($file); + if ($this->columnMap) { + $headerMap = $this->getNormalisedColumnMap(); + $remapper = function ($row, $rowOffset, $iterator) use ($headerMap, $tabExtractor) { + $row = $tabExtractor($row, $rowOffset, $iterator); + foreach ($headerMap as $column => $renamedColumn) { + if ($column == $renamedColumn) { + continue; + } + if (array_key_exists($column, $row)) { + if (strpos($renamedColumn, '_ignore_') !== 0) { + $row[$renamedColumn] = $row[$column]; + } + unset($row[$column]); + } + } + return $row; + }; + } else { + $remapper = $tabExtractor; } - } catch (Exception $e) { - $failedMessage = sprintf("Failed to parse %s", $last); + + if ($this->hasHeaderRow) { + $rows = $csvReader->fetchAssoc(0, $remapper); + } elseif ($this->columnMap) { + $rows = $csvReader->fetchAssoc($headerMap, $remapper); + } + + $result = BulkLoader_Result::create(); + + foreach ($rows as $row) { + $this->processRecord($row, $this->columnMap, $result, $preview); + } + } catch (\Exception $e) { + $failedMessage = sprintf("Failed to parse %s", $filepath); if (Director::isDev()) { $failedMessage = sprintf($failedMessage . " because %s", $e->getMessage()); } print $failedMessage . PHP_EOL; + } finally { + ini_set('auto_detect_line_endings', $previousDetectLE); } - return $result; } + protected function getNormalisedColumnMap() + { + $map = []; + foreach ($this->columnMap as $column => $newColumn) { + if (strpos($newColumn, "->") === 0) { + $map[$column] = $column; + } elseif (is_null($newColumn)) { + // the column map must consist of unique scalar values + // `null` can be present multiple times and is not scalar + // so we name it in a standard way so we can remove it later + $map[$column] = '_ignore_' . $column; + } else { + $map[$column] = $newColumn; + } + } + return $map; + } + /** * Splits a large file up into many smaller files. * @@ -108,6 +156,7 @@ class CsvBulkLoader extends BulkLoader */ protected function splitFile($path, $lines = null) { + Deprecation::notice('5.0', 'splitFile is deprecated, please process files using a stream'); $previous = ini_get('auto_detect_line_endings'); ini_set('auto_detect_line_endings', true); @@ -169,6 +218,7 @@ class CsvBulkLoader extends BulkLoader */ protected function getNewSplitFileName() { + Deprecation::notice('5.0', 'getNewSplitFileName is deprecated, please name your files yourself'); return TEMP_PATH . DIRECTORY_SEPARATOR . uniqid(str_replace('\\', '_', static::class), true) . '.csv'; } @@ -180,6 +230,7 @@ class CsvBulkLoader extends BulkLoader */ protected function processChunk($filepath, $preview = false) { + Deprecation::notice('5.0', 'processChunk is deprecated, please process rows individually'); $results = BulkLoader_Result::create(); $csv = new CSVParser( @@ -331,8 +382,7 @@ class CsvBulkLoader extends BulkLoader $obj->destroy(); // memory usage - unset($existingObj); - unset($obj); + unset($existingObj, $obj); return $objID; } diff --git a/src/Forms/GridField/GridFieldExportButton.php b/src/Forms/GridField/GridFieldExportButton.php index e676cc020..4c4d41eac 100644 --- a/src/Forms/GridField/GridFieldExportButton.php +++ b/src/Forms/GridField/GridFieldExportButton.php @@ -2,6 +2,7 @@ namespace SilverStripe\Forms\GridField; +use League\Csv\Writer; use SilverStripe\Control\HTTPRequest; use SilverStripe\Control\HTTPResponse; use SilverStripe\Core\Config\Config; @@ -61,6 +62,7 @@ class GridFieldExportButton implements GridField_HTMLProvider, GridField_ActionP * Place the export button in a

tag below the field * * @param GridField $gridField + * * @return array */ public function getHTMLFragments($gridField) @@ -74,20 +76,21 @@ class GridFieldExportButton implements GridField_HTMLProvider, GridField_ActionP ); $button->addExtraClass('btn btn-secondary no-ajax font-icon-down-circled action_export'); $button->setForm($gridField->getForm()); - return array( - $this->targetFragment => $button->Field() - ); + return [ + $this->targetFragment => $button->Field(), + ]; } /** * export is an action button * * @param GridField $gridField + * * @return array */ public function getActions($gridField) { - return array('export'); + return ['export']; } public function handleAction(GridField $gridField, $actionName, $arguments, $data) @@ -102,13 +105,14 @@ class GridFieldExportButton implements GridField_HTMLProvider, GridField_ActionP * it is also a URL * * @param GridField $gridField + * * @return array */ public function getURLHandlers($gridField) { - return array( + return [ 'export' => 'handleExport', - ); + ]; } /** @@ -116,6 +120,7 @@ class GridFieldExportButton implements GridField_HTMLProvider, GridField_ActionP * * @param GridField $gridField * @param HTTPRequest $request + * * @return HTTPResponse */ public function handleExport($gridField, $request = null) @@ -155,15 +160,33 @@ class GridFieldExportButton implements GridField_HTMLProvider, GridField_ActionP * Generate export fields for CSV. * * @param GridField $gridField + * * @return string */ public function generateExportFileData($gridField) { $csvColumns = $this->getExportColumnsForGridField($gridField); - $fileData = array(); + + $csvWriter = Writer::createFromFileObject(new \SplTempFileObject()); + $csvWriter->setDelimiter($this->getCsvSeparator()); + $csvWriter->setEnclosure($this->getCsvEnclosure()); + $csvWriter->setNewline("\r\n"); //use windows line endings for compatibility with some csv libraries + $csvWriter->setOutputBOM(Writer::BOM_UTF8); + + if (!Config::inst()->get(get_class($this), 'xls_export_disabled')) { + $csvWriter->addFormatter(function (array $row) { + foreach ($row as &$item) { + // [SS-2017-007] Sanitise XLS executable column values with a leading tab + if (preg_match('/^[-@=+].*/', $item)) { + $item = "\t" . $item; + } + } + return $row; + }); + } if ($this->csvHasHeader) { - $headers = array(); + $headers = []; // determine the CSV headers. If a field is callable (e.g. anonymous function) then use the // source name as the header instead @@ -175,7 +198,8 @@ class GridFieldExportButton implements GridField_HTMLProvider, GridField_ActionP } } - $fileData[] = $headers; + $csvWriter->insertOne($headers); + unset($headers); } //Remove GridFieldPaginator as we're going to export the entire list. @@ -193,7 +217,7 @@ class GridFieldExportButton implements GridField_HTMLProvider, GridField_ActionP /** @var DataObject $item */ foreach ($items->limit(null) as $item) { if (!$item->hasMethod('canView') || $item->canView()) { - $columnData = array(); + $columnData = []; foreach ($csvColumns as $columnSource => $columnHeader) { if (!is_string($columnHeader) && is_callable($columnHeader)) { @@ -212,16 +236,10 @@ class GridFieldExportButton implements GridField_HTMLProvider, GridField_ActionP } } - // [SS-2017-007] Sanitise XLS executable column values with a leading tab - if (!Config::inst()->get(get_class($this), 'xls_export_disabled') - && preg_match('/^[-@=+].*/', $value) - ) { - $value = "\t" . $value; - } $columnData[] = $value; } - $fileData[] = $columnData; + $csvWriter->insertOne($columnData); } if ($item->hasMethod('destroy')) { @@ -229,13 +247,7 @@ class GridFieldExportButton implements GridField_HTMLProvider, GridField_ActionP } } - // Convert the $fileData array into csv by capturing fputcsv's output - $csv = fopen('php://temp', 'r+'); - foreach ($fileData as $line) { - fputcsv($csv, $line, $this->getCsvSeparator(), $this->getCsvEnclosure()); - } - rewind($csv); - return stream_get_contents($csv); + return (string)$csvWriter; } /** @@ -248,6 +260,7 @@ class GridFieldExportButton implements GridField_HTMLProvider, GridField_ActionP /** * @param array $cols + * * @return $this */ public function setExportColumns($cols) @@ -266,6 +279,7 @@ class GridFieldExportButton implements GridField_HTMLProvider, GridField_ActionP /** * @param string $separator + * * @return $this */ public function setCsvSeparator($separator) @@ -284,6 +298,7 @@ class GridFieldExportButton implements GridField_HTMLProvider, GridField_ActionP /** * @param string $enclosure + * * @return $this */ public function setCsvEnclosure($enclosure) @@ -302,6 +317,7 @@ class GridFieldExportButton implements GridField_HTMLProvider, GridField_ActionP /** * @param boolean $bool + * * @return $this */ public function setCsvHasHeader($bool) diff --git a/tests/php/Dev/CsvBulkLoaderTest.php b/tests/php/Dev/CsvBulkLoaderTest.php index beb0f54e7..8885e58a9 100644 --- a/tests/php/Dev/CsvBulkLoaderTest.php +++ b/tests/php/Dev/CsvBulkLoaderTest.php @@ -50,7 +50,7 @@ class CsvBulkLoaderTest extends SapphireTest $results = $loader->load($filepath); // Test that right amount of columns was imported - $this->assertEquals(5, $results->Count(), 'Test correct count of imported data'); + $this->assertCount(5, $results, 'Test correct count of imported data'); // Test that columns were correctly imported $obj = DataObject::get_one( @@ -76,20 +76,50 @@ class CsvBulkLoaderTest extends SapphireTest $filepath = $this->csvPath . 'PlayersWithHeader.csv'; $loader->deleteExistingRecords = true; $results1 = $loader->load($filepath); - $this->assertEquals(5, $results1->Count(), 'Test correct count of imported data on first load'); + $this->assertCount(5, $results1, 'Test correct count of imported data on first load'); //delete existing data before doing second CSV import $results2 = $loader->load($filepath); //get all instances of the loaded DataObject from the database and count them $resultDataObject = DataObject::get(Player::class); - $this->assertEquals( + $this->assertCount( 5, - $resultDataObject->count(), + $resultDataObject, 'Test if existing data is deleted before new data is added' ); } + public function testLeadingTabs() + { + $loader = new CsvBulkLoader(Player::class); + $loader->hasHeaderRow = false; + $loader->columnMap = array( + 'FirstName', + 'Biography', + null, // ignored column + 'Birthday', + 'IsRegistered' + ); + $filepath = $this->csvPath . 'PlayersWithTabs.csv'; + $results = $loader->load($filepath); + $this->assertCount(5, $results); + + $expectedBios = [ + "\tHe's a good guy", + "=She is awesome.\nSo awesome that she gets multiple rows and \"escaped\" strings in her biography", + "-Pretty old\, with an escaped comma", + "@Unicode FTW", + "+Unicode FTW", + ]; + + foreach (Player::get()->column('Biography') as $bio) { + $this->assertContains($bio, $expectedBios); + } + + $this->assertEquals(Player::get()->count(), count($expectedBios)); + } + /** * Test import with manual column mapping */ @@ -111,7 +141,7 @@ class CsvBulkLoaderTest extends SapphireTest $results = $loader->load($filepath); // Test that right amount of columns was imported - $this->assertEquals(4, $results->Count(), 'Test correct count of imported data'); + $this->assertCount(4, $results, 'Test correct count of imported data'); // Test that columns were correctly imported $obj = DataObject::get_one( @@ -167,7 +197,7 @@ class CsvBulkLoaderTest extends SapphireTest $results = $loader->load($filepath); // Test that right amount of columns was imported - $this->assertEquals(1, $results->Count(), 'Test correct count of imported data'); + $this->assertCount(1, $results, 'Test correct count of imported data'); // Test of augumenting existing relation (created by fixture) $testTeam = DataObject::get_one(Team::class, null, null, '"Created" DESC'); @@ -246,9 +276,9 @@ class CsvBulkLoaderTest extends SapphireTest $results = $loader->load($filepath); $createdPlayers = $results->Created(); $player = $createdPlayers->first(); - $this->assertEquals($player->FirstName, 'Customized John'); - $this->assertEquals($player->Biography, "He's a good guy"); - $this->assertEquals($player->IsRegistered, "1"); + $this->assertEquals('Customized John', $player->FirstName); + $this->assertEquals("He's a good guy", $player->Biography); + $this->assertEquals("1", $player->IsRegistered); } public function testLoadWithCustomImportMethodDuplicateMap() @@ -290,6 +320,6 @@ class CsvBulkLoaderTest extends SapphireTest $results = $loader->load($path); - $this->assertEquals(10, $results->Count()); + $this->assertCount(10, $results); } } diff --git a/tests/php/Dev/CsvBulkLoaderTest/csv/PlayersWithTabs.csv b/tests/php/Dev/CsvBulkLoaderTest/csv/PlayersWithTabs.csv new file mode 100644 index 000000000..1f1a5207a --- /dev/null +++ b/tests/php/Dev/CsvBulkLoaderTest/csv/PlayersWithTabs.csv @@ -0,0 +1,6 @@ +"John"," He's a good guy","ignored","1988-01-31","1" +"Jane"," =She is awesome. +So awesome that she gets multiple rows and ""escaped"" strings in her biography","ignored","1982-01-31","0" +"Jamie"," -Pretty old\, with an escaped comma","ignored","1882-01-31","1" +"Järg"," @Unicode FTW","ignored","1982-06-30","1" +"Järg"," +Unicode FTW","ignored","1982-06-30","1" diff --git a/tests/php/Forms/GridField/GridFieldExportButtonTest.php b/tests/php/Forms/GridField/GridFieldExportButtonTest.php index c9c1bbbeb..5738beb98 100644 --- a/tests/php/Forms/GridField/GridFieldExportButtonTest.php +++ b/tests/php/Forms/GridField/GridFieldExportButtonTest.php @@ -2,6 +2,7 @@ namespace SilverStripe\Forms\Tests\GridField; +use League\Csv\Reader; use SilverStripe\Forms\Tests\GridField\GridFieldExportButtonTest\NoView; use SilverStripe\Forms\Tests\GridField\GridFieldExportButtonTest\Team; use SilverStripe\ORM\DataList; @@ -54,9 +55,12 @@ class GridFieldExportButtonTest extends SapphireTest $config = GridFieldConfig::create()->addComponent(new GridFieldExportButton()); $gridField = new GridField('testfield', 'testfield', $list, $config); + $csvReader = Reader::createFromString($button->generateExportFileData($gridField)); + $bom = $csvReader->getInputBOM(); + $this->assertEquals( - "\"My Name\"\n", - $button->generateExportFileData($gridField) + "$bom\"My Name\"\r\n", + (string) $csvReader ); } @@ -65,9 +69,12 @@ class GridFieldExportButtonTest extends SapphireTest $button = new GridFieldExportButton(); $button->setExportColumns(['Name' => 'My Name']); + $csvReader = Reader::createFromString($button->generateExportFileData($this->gridField)); + $bom = $csvReader->getInputBOM(); + $this->assertEquals( - '"My Name"' . "\n" . 'Test' . "\n" . 'Test2' . "\n", - $button->generateExportFileData($this->gridField) + $bom . '"My Name"' . "\r\n" . 'Test' . "\r\n" . 'Test2' . "\r\n", + (string) $csvReader ); } @@ -82,9 +89,12 @@ class GridFieldExportButtonTest extends SapphireTest $button = new GridFieldExportButton(); $button->setExportColumns(['Name' => 'My Name']); + $csvReader = Reader::createFromString($button->generateExportFileData($this->gridField)); + $bom = $csvReader->getInputBOM(); + $this->assertEquals( - "\"My Name\"\n\"\t=SUM(1, 2)\"\nTest\nTest2\n", - $button->generateExportFileData($this->gridField) + "$bom\"My Name\"\r\n\"\t=SUM(1, 2)\"\r\nTest\r\nTest2\r\n", + (string) $csvReader ); } @@ -98,9 +108,12 @@ class GridFieldExportButtonTest extends SapphireTest } ]); + $csvReader = Reader::createFromString($button->generateExportFileData($this->gridField)); + $bom = $csvReader->getInputBOM(); + $this->assertEquals( - 'Name,City' . "\n" . 'Test,"City city"' . "\n" . 'Test2,"Quoted ""City"" 2 city"' . "\n", - $button->generateExportFileData($this->gridField) + $bom . 'Name,City' . "\r\n" . 'Test,"City city"' . "\r\n" . 'Test2,"Quoted ""City"" 2 city"' . "\r\n", + (string) $csvReader ); } @@ -112,9 +125,12 @@ class GridFieldExportButtonTest extends SapphireTest 'City' => 'strtolower', ]); + $csvReader = Reader::createFromString($button->generateExportFileData($this->gridField)); + $bom = $csvReader->getInputBOM(); + $this->assertEquals( - 'Name,strtolower' . "\n" . 'Test,City' . "\n" . 'Test2,"Quoted ""City"" 2"' . "\n", - $button->generateExportFileData($this->gridField) + $bom . 'Name,strtolower' . "\r\n" . 'Test,City' . "\r\n" . 'Test2,"Quoted ""City"" 2"' . "\r\n", + (string) $csvReader ); } @@ -127,9 +143,12 @@ class GridFieldExportButtonTest extends SapphireTest ]); $button->setCsvHasHeader(false); + $csvReader = Reader::createFromString($button->generateExportFileData($this->gridField)); + $bom = $csvReader->getInputBOM(); + $this->assertEquals( - 'Test,City' . "\n" . 'Test2,"Quoted ""City"" 2"' . "\n", - $button->generateExportFileData($this->gridField) + $bom . 'Test,City' . "\r\n" . 'Test2,"Quoted ""City"" 2"' . "\r\n", + (string) $csvReader ); } @@ -146,9 +165,14 @@ class GridFieldExportButtonTest extends SapphireTest } $this->gridField->setList($arrayList); + $exportData = $button->generateExportFileData($this->gridField); + + $csvReader = Reader::createFromString($exportData); + $bom = $csvReader->getInputBOM(); + $this->assertEquals( - "ID\n" . "1\n" . "2\n" . "3\n" . "4\n" . "5\n" . "6\n" . "7\n" . "8\n" . "9\n" . "10\n" . "11\n" . "12\n" . "13\n" . "14\n" . "15\n" . "16\n", - $button->generateExportFileData($this->gridField) + $bom . "ID\r\n" . "1\r\n" . "2\r\n" . "3\r\n" . "4\r\n" . "5\r\n" . "6\r\n" . "7\r\n" . "8\r\n" . "9\r\n" . "10\r\n" . "11\r\n" . "12\r\n" . "13\r\n" . "14\r\n" . "15\r\n" . "16\r\n", + (string) $csvReader ); } @@ -159,9 +183,12 @@ class GridFieldExportButtonTest extends SapphireTest 'RugbyTeamNumber' => 'Rugby Team Number' ]); + $csvReader = Reader::createFromString($button->generateExportFileData($this->gridField)); + $bom = $csvReader->getInputBOM(); + $this->assertEquals( - "\"Rugby Team Number\"\n2\n0\n", - $button->generateExportFileData($this->gridField) + "$bom\"Rugby Team Number\"\r\n2\r\n0\r\n", + (string) $csvReader ); } }