Merge pull request #4563 from assertchris/split-bulk-loader-imports-to-reduce-memory-consumption

Splitting BulkLoader imports to reduce memory consumption
This commit is contained in:
Damian Mooyman 2015-09-15 10:22:45 +12:00
commit 8c99659e3f
4 changed files with 149 additions and 0 deletions

View File

@ -408,4 +408,15 @@ class BulkLoader_Result extends Object {
return $set; return $set;
} }
/**
* Merges another BulkLoader_Result into this one.
*
* @param BulkLoader_Result $other
*/
public function merge(BulkLoader_Result $other) {
$this->created = array_merge($this->created, $other->created);
$this->updated = array_merge($this->updated, $other->updated);
$this->deleted = array_merge($this->deleted, $other->deleted);
}
} }

View File

@ -37,6 +37,15 @@ class CsvBulkLoader extends BulkLoader {
*/ */
public $hasHeaderRow = true; public $hasHeaderRow = true;
/**
* Number of lines to split large CSV files into.
*
* @var int
*
* @config
*/
private static $lines = 1000;
/** /**
* @inheritDoc * @inheritDoc
*/ */
@ -47,8 +56,115 @@ class CsvBulkLoader extends BulkLoader {
/** /**
* @param string $filepath * @param string $filepath
* @param boolean $preview * @param boolean $preview
*
* @return null|BulkLoader_Result
*/ */
protected function processAll($filepath, $preview = false) { protected function processAll($filepath, $preview = false) {
$files = $this->splitFile($filepath);
$result = null;
$last = null;
try {
foreach ($files as $file) {
$last = $file;
$next = $this->processChunk($file, false);
if ($result instanceof BulkLoader_Result) {
$result->merge($next);
} else {
$result = $next;
}
@unlink($file);
}
} catch (Exception $e) {
print "Failed to parse {$last}\n";
}
return $result;
}
/**
* Splits a large file up into many smaller files.
*
* @param string $path Path to large file to split
* @param int $lines Number of lines per file
*
* @return array List of file paths
*/
protected function splitFile($path, $lines = null) {
$previous = ini_get('auto_detect_line_endings');
ini_set('auto_detect_line_endings', true);
if (!is_int($lines)) {
$lines = $this->config()->get("lines");
}
$new = $this->getNewSplitFileName();
$to = fopen($new, 'w+');
$from = fopen($path, 'r');
$header = null;
if ($this->hasHeaderRow) {
$header = fgets($from);
fwrite($to, $header);
}
$files = array();
$files[] = $new;
$count = 0;
while (!feof($from)) {
fwrite($to, fgets($from));
$count++;
if ($count >= $lines) {
fclose($to);
// get a new temporary file name, to write the next lines to
$new = $this->getNewSplitFileName();
$to = fopen($new, 'w+');
if ($this->hasHeaderRow) {
// add the headers to the new file
fwrite($to, $header);
}
$files[] = $new;
$count = 0;
}
}
fclose($to);
ini_set('auto_detect_line_endings', $previous);
return $files;
}
/**
* @return string
*/
protected function getNewSplitFileName() {
return TEMP_FOLDER . '/' . uniqid('BulkLoader', true) . '.csv';
}
/**
* @param string $filepath
* @param boolean $preview
*
* @return BulkLoader_Result
*/
protected function processChunk($filepath, $preview = false) {
$results = new BulkLoader_Result(); $results = new BulkLoader_Result();
$csv = new CSVParser( $csv = new CSVParser(

View File

@ -229,6 +229,17 @@ class CsvBulkLoaderTest extends SapphireTest {
return $i; return $i;
} }
public function testLargeFileSplitIntoSmallerFiles() {
Config::inst()->update('CsvBulkLoader', 'lines', 3);
$loader = new CsvBulkLoader('CsvBulkLoaderTest_Player');
$path = $this->getCurrentAbsolutePath() . '/CsvBulkLoaderTest_LargeListOfPlayers.csv';
$results = $loader->load($path);
$this->assertEquals(10, $results->Count());
}
} }
class CsvBulkLoaderTest_CustomLoader extends CsvBulkLoader implements TestOnly { class CsvBulkLoaderTest_CustomLoader extends CsvBulkLoader implements TestOnly {

View File

@ -0,0 +1,11 @@
"Name","Biography","Birthday","IsRegistered"
"Name0","Biography0","Birthday0","1"
"Name1","Biography1","Birthday1","1"
"Name2","Biography2","Birthday2","1"
"Name3","Biography3","Birthday3","1"
"Name4","Biography4","Birthday4","1"
"Name5","Biography5","Birthday5","1"
"Name6","Biography6","Birthday6","1"
"Name7","Biography7","Birthday7","1"
"Name8","Biography8","Birthday8","1"
"Name9","Biography9","Birthday9","1"
1 Name Biography Birthday IsRegistered
2 Name0 Biography0 Birthday0 1
3 Name1 Biography1 Birthday1 1
4 Name2 Biography2 Birthday2 1
5 Name3 Biography3 Birthday3 1
6 Name4 Biography4 Birthday4 1
7 Name5 Biography5 Birthday5 1
8 Name6 Biography6 Birthday6 1
9 Name7 Biography7 Birthday7 1
10 Name8 Biography8 Birthday8 1
11 Name9 Biography9 Birthday9 1