diff --git a/dev/BulkLoader.php b/dev/BulkLoader.php index 53651cc34..98765e813 100644 --- a/dev/BulkLoader.php +++ b/dev/BulkLoader.php @@ -408,4 +408,15 @@ class BulkLoader_Result extends Object { return $set; } + + /** + * Merges another BulkLoader_Result into this one. + * + * @param BulkLoader_Result $other + */ + public function merge(BulkLoader_Result $other) { + $this->created = array_merge($this->created, $other->created); + $this->updated = array_merge($this->updated, $other->updated); + $this->deleted = array_merge($this->deleted, $other->deleted); + } } diff --git a/dev/CsvBulkLoader.php b/dev/CsvBulkLoader.php index 4d6fea561..5ec535608 100644 --- a/dev/CsvBulkLoader.php +++ b/dev/CsvBulkLoader.php @@ -37,6 +37,15 @@ class CsvBulkLoader extends BulkLoader { */ public $hasHeaderRow = true; + /** + * Number of lines to split large CSV files into. + * + * @var int + * + * @config + */ + private static $lines = 1000; + /** * @inheritDoc */ @@ -47,8 +56,115 @@ class CsvBulkLoader extends BulkLoader { /** * @param string $filepath * @param boolean $preview + * + * @return null|BulkLoader_Result */ protected function processAll($filepath, $preview = false) { + $files = $this->splitFile($filepath); + + $result = null; + $last = null; + + try { + foreach ($files as $file) { + $last = $file; + + $next = $this->processChunk($file, false); + + if ($result instanceof BulkLoader_Result) { + $result->merge($next); + } else { + $result = $next; + } + + @unlink($file); + } + } catch (Exception $e) { + print "Failed to parse {$last}\n"; + } + + return $result; + } + + /** + * Splits a large file up into many smaller files. + * + * @param string $path Path to large file to split + * @param int $lines Number of lines per file + * + * @return array List of file paths + */ + protected function splitFile($path, $lines = null) { + $previous = ini_get('auto_detect_line_endings'); + + ini_set('auto_detect_line_endings', true); + + if (!is_int($lines)) { + $lines = $this->config()->get("lines"); + } + + $new = $this->getNewSplitFileName(); + + $to = fopen($new, 'w+'); + $from = fopen($path, 'r'); + + $header = null; + + if ($this->hasHeaderRow) { + $header = fgets($from); + fwrite($to, $header); + } + + $files = array(); + $files[] = $new; + + $count = 0; + + while (!feof($from)) { + fwrite($to, fgets($from)); + + $count++; + + if ($count >= $lines) { + fclose($to); + + // get a new temporary file name, to write the next lines to + $new = $this->getNewSplitFileName(); + + $to = fopen($new, 'w+'); + + if ($this->hasHeaderRow) { + // add the headers to the new file + fwrite($to, $header); + } + + $files[] = $new; + + $count = 0; + } + } + + fclose($to); + + ini_set('auto_detect_line_endings', $previous); + + return $files; + } + + /** + * @return string + */ + protected function getNewSplitFileName() { + return TEMP_FOLDER . '/' . uniqid('BulkLoader', true) . '.csv'; + } + + /** + * @param string $filepath + * @param boolean $preview + * + * @return BulkLoader_Result + */ + protected function processChunk($filepath, $preview = false) { $results = new BulkLoader_Result(); $csv = new CSVParser( diff --git a/tests/dev/CsvBulkLoaderTest.php b/tests/dev/CsvBulkLoaderTest.php index 366c3bc83..7b1d15a5b 100644 --- a/tests/dev/CsvBulkLoaderTest.php +++ b/tests/dev/CsvBulkLoaderTest.php @@ -229,6 +229,17 @@ class CsvBulkLoaderTest extends SapphireTest { return $i; } + public function testLargeFileSplitIntoSmallerFiles() { + Config::inst()->update('CsvBulkLoader', 'lines', 3); + + $loader = new CsvBulkLoader('CsvBulkLoaderTest_Player'); + $path = $this->getCurrentAbsolutePath() . '/CsvBulkLoaderTest_LargeListOfPlayers.csv'; + + $results = $loader->load($path); + + $this->assertEquals(10, $results->Count()); + } + } class CsvBulkLoaderTest_CustomLoader extends CsvBulkLoader implements TestOnly { diff --git a/tests/dev/CsvBulkLoaderTest_LargeListOfPlayers.csv b/tests/dev/CsvBulkLoaderTest_LargeListOfPlayers.csv new file mode 100644 index 000000000..c0121ac50 --- /dev/null +++ b/tests/dev/CsvBulkLoaderTest_LargeListOfPlayers.csv @@ -0,0 +1,11 @@ +"Name","Biography","Birthday","IsRegistered" +"Name0","Biography0","Birthday0","1" +"Name1","Biography1","Birthday1","1" +"Name2","Biography2","Birthday2","1" +"Name3","Biography3","Birthday3","1" +"Name4","Biography4","Birthday4","1" +"Name5","Biography5","Birthday5","1" +"Name6","Biography6","Birthday6","1" +"Name7","Biography7","Birthday7","1" +"Name8","Biography8","Birthday8","1" +"Name9","Biography9","Birthday9","1" \ No newline at end of file