mirror of
https://github.com/silverstripe/silverstripe-framework
synced 2024-10-22 14:05:37 +02:00
Merge pull request #4563 from assertchris/split-bulk-loader-imports-to-reduce-memory-consumption
Splitting BulkLoader imports to reduce memory consumption
This commit is contained in:
commit
8c99659e3f
@ -408,4 +408,15 @@ class BulkLoader_Result extends Object {
|
||||
|
||||
return $set;
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges another BulkLoader_Result into this one.
|
||||
*
|
||||
* @param BulkLoader_Result $other
|
||||
*/
|
||||
public function merge(BulkLoader_Result $other) {
|
||||
$this->created = array_merge($this->created, $other->created);
|
||||
$this->updated = array_merge($this->updated, $other->updated);
|
||||
$this->deleted = array_merge($this->deleted, $other->deleted);
|
||||
}
|
||||
}
|
||||
|
@ -37,6 +37,15 @@ class CsvBulkLoader extends BulkLoader {
|
||||
*/
|
||||
public $hasHeaderRow = true;
|
||||
|
||||
/**
|
||||
* Number of lines to split large CSV files into.
|
||||
*
|
||||
* @var int
|
||||
*
|
||||
* @config
|
||||
*/
|
||||
private static $lines = 1000;
|
||||
|
||||
/**
|
||||
* @inheritDoc
|
||||
*/
|
||||
@ -47,8 +56,115 @@ class CsvBulkLoader extends BulkLoader {
|
||||
/**
|
||||
* @param string $filepath
|
||||
* @param boolean $preview
|
||||
*
|
||||
* @return null|BulkLoader_Result
|
||||
*/
|
||||
protected function processAll($filepath, $preview = false) {
|
||||
$files = $this->splitFile($filepath);
|
||||
|
||||
$result = null;
|
||||
$last = null;
|
||||
|
||||
try {
|
||||
foreach ($files as $file) {
|
||||
$last = $file;
|
||||
|
||||
$next = $this->processChunk($file, false);
|
||||
|
||||
if ($result instanceof BulkLoader_Result) {
|
||||
$result->merge($next);
|
||||
} else {
|
||||
$result = $next;
|
||||
}
|
||||
|
||||
@unlink($file);
|
||||
}
|
||||
} catch (Exception $e) {
|
||||
print "Failed to parse {$last}\n";
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits a large file up into many smaller files.
|
||||
*
|
||||
* @param string $path Path to large file to split
|
||||
* @param int $lines Number of lines per file
|
||||
*
|
||||
* @return array List of file paths
|
||||
*/
|
||||
protected function splitFile($path, $lines = null) {
|
||||
$previous = ini_get('auto_detect_line_endings');
|
||||
|
||||
ini_set('auto_detect_line_endings', true);
|
||||
|
||||
if (!is_int($lines)) {
|
||||
$lines = $this->config()->get("lines");
|
||||
}
|
||||
|
||||
$new = $this->getNewSplitFileName();
|
||||
|
||||
$to = fopen($new, 'w+');
|
||||
$from = fopen($path, 'r');
|
||||
|
||||
$header = null;
|
||||
|
||||
if ($this->hasHeaderRow) {
|
||||
$header = fgets($from);
|
||||
fwrite($to, $header);
|
||||
}
|
||||
|
||||
$files = array();
|
||||
$files[] = $new;
|
||||
|
||||
$count = 0;
|
||||
|
||||
while (!feof($from)) {
|
||||
fwrite($to, fgets($from));
|
||||
|
||||
$count++;
|
||||
|
||||
if ($count >= $lines) {
|
||||
fclose($to);
|
||||
|
||||
// get a new temporary file name, to write the next lines to
|
||||
$new = $this->getNewSplitFileName();
|
||||
|
||||
$to = fopen($new, 'w+');
|
||||
|
||||
if ($this->hasHeaderRow) {
|
||||
// add the headers to the new file
|
||||
fwrite($to, $header);
|
||||
}
|
||||
|
||||
$files[] = $new;
|
||||
|
||||
$count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
fclose($to);
|
||||
|
||||
ini_set('auto_detect_line_endings', $previous);
|
||||
|
||||
return $files;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string
|
||||
*/
|
||||
protected function getNewSplitFileName() {
|
||||
return TEMP_FOLDER . '/' . uniqid('BulkLoader', true) . '.csv';
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $filepath
|
||||
* @param boolean $preview
|
||||
*
|
||||
* @return BulkLoader_Result
|
||||
*/
|
||||
protected function processChunk($filepath, $preview = false) {
|
||||
$results = new BulkLoader_Result();
|
||||
|
||||
$csv = new CSVParser(
|
||||
|
@ -229,6 +229,17 @@ class CsvBulkLoaderTest extends SapphireTest {
|
||||
return $i;
|
||||
}
|
||||
|
||||
public function testLargeFileSplitIntoSmallerFiles() {
|
||||
Config::inst()->update('CsvBulkLoader', 'lines', 3);
|
||||
|
||||
$loader = new CsvBulkLoader('CsvBulkLoaderTest_Player');
|
||||
$path = $this->getCurrentAbsolutePath() . '/CsvBulkLoaderTest_LargeListOfPlayers.csv';
|
||||
|
||||
$results = $loader->load($path);
|
||||
|
||||
$this->assertEquals(10, $results->Count());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class CsvBulkLoaderTest_CustomLoader extends CsvBulkLoader implements TestOnly {
|
||||
|
11
tests/dev/CsvBulkLoaderTest_LargeListOfPlayers.csv
Normal file
11
tests/dev/CsvBulkLoaderTest_LargeListOfPlayers.csv
Normal file
@ -0,0 +1,11 @@
|
||||
"Name","Biography","Birthday","IsRegistered"
|
||||
"Name0","Biography0","Birthday0","1"
|
||||
"Name1","Biography1","Birthday1","1"
|
||||
"Name2","Biography2","Birthday2","1"
|
||||
"Name3","Biography3","Birthday3","1"
|
||||
"Name4","Biography4","Birthday4","1"
|
||||
"Name5","Biography5","Birthday5","1"
|
||||
"Name6","Biography6","Birthday6","1"
|
||||
"Name7","Biography7","Birthday7","1"
|
||||
"Name8","Biography8","Birthday8","1"
|
||||
"Name9","Biography9","Birthday9","1"
|
|
Loading…
Reference in New Issue
Block a user