silverstripe-framework/dev/CsvBulkLoader.php

369 lines
9.0 KiB
PHP
Raw Normal View History

<?php
/**
2014-08-15 08:53:05 +02:00
* Utility class to facilitate complex CSV-imports by defining column-mappings
* and custom converters.
2013-05-14 12:01:15 +02:00
*
2014-08-15 08:53:05 +02:00
* Uses the fgetcsv() function to process CSV input. Accepts a file-handler as
2013-05-14 12:01:15 +02:00
* input.
2014-08-15 08:53:05 +02:00
*
* @see http://tools.ietf.org/html/rfc4180
2013-05-14 12:01:15 +02:00
*
* @package framework
* @subpackage bulkloading
2014-08-15 08:53:05 +02:00
*
* @todo Support for deleting existing records not matched in the import
2013-05-14 12:01:15 +02:00
* (through relation checks)
*/
class CsvBulkLoader extends BulkLoader {
2014-08-15 08:53:05 +02:00
/**
* Delimiter character (Default: comma).
*
* @var string
*/
public $delimiter = ',';
2014-08-15 08:53:05 +02:00
/**
* Enclosure character (Default: doublequote)
*
* @var string
*/
public $enclosure = '"';
2014-08-15 08:53:05 +02:00
/**
* Identifies if csv the has a header row.
*
* @var boolean
*/
public $hasHeaderRow = true;
2014-08-15 08:53:05 +02:00
/**
* Number of lines to split large CSV files into.
*
* @var int
*
* @config
*/
private static $lines = 1000;
/**
* @inheritDoc
*/
public function preview($filepath) {
return $this->processAll($filepath, true);
}
2014-08-15 08:53:05 +02:00
/**
* @param string $filepath
* @param boolean $preview
*
* @return null|BulkLoader_Result
*/
protected function processAll($filepath, $preview = false) {
$files = $this->splitFile($filepath);
$result = null;
$last = null;
try {
foreach ($files as $file) {
$last = $file;
$next = $this->processChunk($file, false);
if ($result instanceof BulkLoader_Result) {
$result->merge($next);
} else {
$result = $next;
}
@unlink($file);
}
} catch (Exception $e) {
print "Failed to parse {$last}\n";
}
return $result;
}
/**
* Splits a large file up into many smaller files.
*
* @param string $path Path to large file to split
* @param int $lines Number of lines per file
*
* @return array List of file paths
*/
protected function splitFile($path, $lines = null) {
$previous = ini_get('auto_detect_line_endings');
ini_set('auto_detect_line_endings', true);
if (!is_int($lines)) {
$lines = $this->config()->get("lines");
}
$new = $this->getNewSplitFileName();
$to = fopen($new, 'w+');
$from = fopen($path, 'r');
$header = null;
if ($this->hasHeaderRow) {
$header = fgets($from);
fwrite($to, $header);
}
$files = array();
$files[] = $new;
$count = 0;
while (!feof($from)) {
fwrite($to, fgets($from));
$count++;
if ($count >= $lines) {
fclose($to);
// get a new temporary file name, to write the next lines to
$new = $this->getNewSplitFileName();
$to = fopen($new, 'w+');
if ($this->hasHeaderRow) {
// add the headers to the new file
fwrite($to, $header);
}
$files[] = $new;
$count = 0;
}
}
fclose($to);
ini_set('auto_detect_line_endings', $previous);
return $files;
}
/**
* @return string
*/
protected function getNewSplitFileName() {
return TEMP_FOLDER . '/' . uniqid('BulkLoader', true) . '.csv';
}
/**
* @param string $filepath
* @param boolean $preview
*
* @return BulkLoader_Result
*/
protected function processChunk($filepath, $preview = false) {
$results = new BulkLoader_Result();
2014-08-15 08:53:05 +02:00
$csv = new CSVParser(
2014-08-15 08:53:05 +02:00
$filepath,
$this->delimiter,
$this->enclosure
);
2014-08-15 08:53:05 +02:00
// ColumnMap has two uses, depending on whether hasHeaderRow is set
if($this->columnMap) {
// if the map goes to a callback, use the same key value as the map
2014-08-15 08:53:05 +02:00
// value, rather than function name as multiple keys may use the
// same callback
foreach($this->columnMap as $k => $v) {
if(strpos($v, "->") === 0) {
$map[$k] = $k;
} else {
$map[$k] = $v;
}
}
if($this->hasHeaderRow) {
$csv->mapColumns($map);
} else {
$csv->provideHeaderRow($map);
}
}
2014-08-15 08:53:05 +02:00
foreach($csv as $row) {
$this->processRecord($row, $this->columnMap, $results, $preview);
}
2014-08-15 08:53:05 +02:00
return $results;
}
2014-08-15 08:53:05 +02:00
/**
* @todo Better messages for relation checks and duplicate detection
2013-05-14 12:01:15 +02:00
* Note that columnMap isn't used.
*
* @param array $record
* @param array $columnMap
* @param BulkLoader_Result $results
* @param boolean $preview
*
* @return int
*/
protected function processRecord($record, $columnMap, &$results, $preview = false) {
$class = $this->objectClass;
2014-08-15 08:53:05 +02:00
// find existing object, or create new one
$existingObj = $this->findExistingObject($record, $columnMap);
2014-08-15 08:53:05 +02:00
$obj = ($existingObj) ? $existingObj : new $class();
// first run: find/create any relations and store them on the object
// we can't combine runs, as other columns might rely on the relation being present
$relations = array();
foreach($record as $fieldName => $val) {
// don't bother querying of value is not set
if($this->isNullValue($val)) continue;
2014-08-15 08:53:05 +02:00
// checking for existing relations
if(isset($this->relationCallbacks[$fieldName])) {
// trigger custom search method for finding a relation based on the given value
// and write it back to the relation (or create a new object)
$relationName = $this->relationCallbacks[$fieldName]['relationname'];
if($this->hasMethod($this->relationCallbacks[$fieldName]['callback'])) {
$relationObj = $this->{$this->relationCallbacks[$fieldName]['callback']}($obj, $val, $record);
} elseif($obj->hasMethod($this->relationCallbacks[$fieldName]['callback'])) {
$relationObj = $obj->{$this->relationCallbacks[$fieldName]['callback']}($val, $record);
}
if(!$relationObj || !$relationObj->exists()) {
$relationClass = $obj->hasOneComponent($relationName);
$relationObj = new $relationClass();
//write if we aren't previewing
if (!$preview) $relationObj->write();
}
$obj->{"{$relationName}ID"} = $relationObj->ID;
//write if we are not previewing
if (!$preview) {
$obj->write();
$obj->flushCache(); // avoid relation caching confusion
}
2014-08-15 08:53:05 +02:00
} elseif(strpos($fieldName, '.') !== false) {
// we have a relation column with dot notation
list($relationName, $columnName) = explode('.', $fieldName);
// always gives us an component (either empty or existing)
$relationObj = $obj->getComponent($relationName);
if (!$preview) $relationObj->write();
$obj->{"{$relationName}ID"} = $relationObj->ID;
2013-05-14 12:01:15 +02:00
//write if we are not previewing
if (!$preview) {
$obj->write();
$obj->flushCache(); // avoid relation caching confusion
}
}
}
// second run: save data
foreach($record as $fieldName => $val) {
// break out of the loop if we are previewing
if ($preview) {
break;
}
// look up the mapping to see if this needs to map to callback
$mapped = $this->columnMap && isset($this->columnMap[$fieldName]);
2014-08-15 08:53:05 +02:00
if($mapped && strpos($this->columnMap[$fieldName], '->') === 0) {
$funcName = substr($this->columnMap[$fieldName], 2);
$this->$funcName($obj, $val, $record);
} else if($obj->hasMethod("import{$fieldName}")) {
$obj->{"import{$fieldName}"}($val, $record);
} else {
$obj->update(array($fieldName => $val));
}
}
// write record
$id = ($preview) ? 0 : $obj->write();
2014-08-15 08:53:05 +02:00
// @todo better message support
$message = '';
2014-08-15 08:53:05 +02:00
// save to results
if($existingObj) {
$results->addUpdated($obj, $message);
} else {
$results->addCreated($obj, $message);
}
2014-08-15 08:53:05 +02:00
$objID = $obj->ID;
2014-08-15 08:53:05 +02:00
$obj->destroy();
2014-08-15 08:53:05 +02:00
// memory usage
unset($existingObj);
unset($obj);
2014-08-15 08:53:05 +02:00
return $objID;
}
2014-08-15 08:53:05 +02:00
/**
2014-08-15 08:53:05 +02:00
* Find an existing objects based on one or more uniqueness columns
2013-05-14 12:01:15 +02:00
* specified via {@link self::$duplicateChecks}.
*
* @param array $record CSV data column
2013-05-14 12:01:15 +02:00
*
* @return mixed
*/
public function findExistingObject($record) {
$SNG_objectClass = singleton($this->objectClass);
// checking for existing records (only if not already found)
2013-05-14 12:01:15 +02:00
foreach($this->duplicateChecks as $fieldName => $duplicateCheck) {
if(is_string($duplicateCheck)) {
2014-08-15 08:53:05 +02:00
// Skip current duplicate check if field value is empty
if(empty($record[$duplicateCheck])) continue;
2013-05-14 12:01:15 +02:00
// Check existing record with this value
$dbFieldValue = $record[$duplicateCheck];
$existingRecord = DataObject::get($this->objectClass)
->filter($duplicateCheck, $dbFieldValue)
->first();
2014-08-15 08:53:05 +02:00
if($existingRecord) return $existingRecord;
} elseif(is_array($duplicateCheck) && isset($duplicateCheck['callback'])) {
if($this->hasMethod($duplicateCheck['callback'])) {
$existingRecord = $this->{$duplicateCheck['callback']}($record[$fieldName], $record);
} elseif($SNG_objectClass->hasMethod($duplicateCheck['callback'])) {
$existingRecord = $SNG_objectClass->{$duplicateCheck['callback']}($record[$fieldName], $record);
} else {
user_error("CsvBulkLoader::processRecord():"
. " {$duplicateCheck['callback']} not found on importer or object class.", E_USER_ERROR);
}
2013-05-14 12:01:15 +02:00
if($existingRecord) {
return $existingRecord;
}
} else {
user_error('CsvBulkLoader::processRecord(): Wrong format for $duplicateChecks', E_USER_ERROR);
}
}
2013-05-14 12:01:15 +02:00
return false;
}
2014-08-15 08:53:05 +02:00
/**
2014-08-15 08:53:05 +02:00
* Determine whether any loaded files should be parsed with a
2013-05-14 12:01:15 +02:00
* header-row (otherwise we rely on {@link self::$columnMap}.
*
* @return boolean
*/
public function hasHeaderRow() {
return ($this->hasHeaderRow || isset($this->columnMap));
}
}