2008-08-09 06:53:34 +02:00
|
|
|
<?php
|
|
|
|
/**
|
|
|
|
* Uses the fgetcsv() function to process CSV input.
|
|
|
|
* The input is expected to be UTF8.
|
|
|
|
*
|
|
|
|
* @see http://rfc.net/rfc4180.html
|
|
|
|
* @package cms
|
|
|
|
* @subpackage bulkloading
|
|
|
|
* @author Ingo Schommer, Silverstripe Ltd. (<firstname>@silverstripe.com)
|
2008-09-15 16:21:43 +02:00
|
|
|
*
|
|
|
|
* @todo Support for deleting existing records not matched in the import (through relation checks)
|
2008-08-09 06:53:34 +02:00
|
|
|
*/
|
|
|
|
class CsvBulkLoader extends BulkLoader {
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Delimiter character (Default: comma).
|
|
|
|
*
|
|
|
|
* @var string
|
|
|
|
*/
|
|
|
|
public $delimiter = ',';
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Enclosure character (Default: doublequote)
|
|
|
|
*
|
|
|
|
* @var string
|
|
|
|
*/
|
|
|
|
public $enclosure = '"';
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Identifies if the loaded file has a header row.
|
|
|
|
* If a {@link self::$columnMap} is passed, we assume
|
|
|
|
* the file has no headerrow, unless explicitly noted.
|
|
|
|
*
|
|
|
|
* @var boolean
|
|
|
|
*/
|
|
|
|
public $hasHeaderRow = false;
|
|
|
|
|
|
|
|
protected function processAll($filepath, $preview = false) {
|
2008-08-28 07:00:34 +02:00
|
|
|
ini_set('auto_detect_line_endings',1);
|
|
|
|
|
2008-08-09 06:53:34 +02:00
|
|
|
$file = fopen($filepath, 'r');
|
|
|
|
if(!$file) return false;
|
|
|
|
|
2008-09-15 16:21:43 +02:00
|
|
|
$results = new BulkLoader_Result();
|
2008-08-09 07:45:43 +02:00
|
|
|
|
2008-08-09 06:53:34 +02:00
|
|
|
if($this->hasHeaderRow && $this->columnMap) {
|
|
|
|
$columnRow = fgetcsv($file, 0, $this->delimiter, $this->enclosure);
|
2008-08-09 07:45:43 +02:00
|
|
|
$columnMap = array();
|
|
|
|
foreach($columnRow as $k => $origColumnName) {
|
|
|
|
$origColumnName = trim($origColumnName);
|
|
|
|
if(isset($this->columnMap[$origColumnName])) {
|
|
|
|
$columnMap[$origColumnName] = $this->columnMap[$origColumnName];
|
|
|
|
} else {
|
|
|
|
$columnMap[$origColumnName] = null;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
2008-08-09 06:53:34 +02:00
|
|
|
} elseif($this->columnMap) {
|
|
|
|
$columnMap = $this->columnMap;
|
|
|
|
} else {
|
2008-08-09 07:45:43 +02:00
|
|
|
// assuming that first row is column naming if no columnmap is passed
|
2008-08-09 06:53:34 +02:00
|
|
|
$columnRow = fgetcsv($file, 0, $this->delimiter, $this->enclosure);
|
|
|
|
$columnMap = array_combine($columnRow, $columnRow);
|
|
|
|
}
|
2008-08-09 07:45:43 +02:00
|
|
|
|
2008-08-11 01:35:11 +02:00
|
|
|
$rowIndex = 0;
|
2008-08-09 07:45:43 +02:00
|
|
|
$rowIndex = 0;
|
2008-08-09 06:53:34 +02:00
|
|
|
while (($row = fgetcsv($file, 0, $this->delimiter, $this->enclosure)) !== FALSE) {
|
2008-08-09 07:45:43 +02:00
|
|
|
$rowIndex++;
|
|
|
|
|
|
|
|
/*
|
|
|
|
// the columnMap should have the same amount of columns as each record row
|
|
|
|
if(count(array_keys($columnMap)) == count(array_values($row))) {
|
|
|
|
user_error("CsvBulkLoader::processAll(): Columns in row {$rowIndex} don't match the \$columnMap", E_USER_WARNING);
|
|
|
|
}
|
|
|
|
*/
|
|
|
|
|
|
|
|
$indexedRow = array();
|
|
|
|
foreach($columnMap as $origColumnName => $fieldName) {
|
|
|
|
// in case the row has less fields than the columnmap,
|
|
|
|
// ignore the "leftover" mappings
|
|
|
|
if(!isset($row[count($indexedRow)])) {
|
|
|
|
user_error("CsvBulkLoader::processAll(): Columns in row {$rowIndex} don't match the \$columnMap", E_USER_NOTICE);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
$indexedRow[$origColumnName] = $row[count($indexedRow)];
|
|
|
|
}
|
2008-09-15 16:21:43 +02:00
|
|
|
|
|
|
|
$this->processRecord($indexedRow, $columnMap, $results);
|
2008-08-09 06:53:34 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
fclose($file);
|
|
|
|
|
2008-09-15 16:21:43 +02:00
|
|
|
return $results;
|
2008-08-09 06:53:34 +02:00
|
|
|
}
|
|
|
|
|
2008-09-15 16:21:43 +02:00
|
|
|
/**
|
|
|
|
* @todo Better messages for relation checks and duplicate detection
|
|
|
|
*/
|
|
|
|
protected function processRecord($record, $columnMap, &$results, $preview = false) {
|
2008-08-09 06:53:34 +02:00
|
|
|
$class = $this->objectClass;
|
2008-08-09 07:00:42 +02:00
|
|
|
|
|
|
|
// find existing object, or create new one
|
2008-08-09 07:45:43 +02:00
|
|
|
$existingObj = $this->findExistingObject($record, $columnMap);
|
2008-08-09 07:00:42 +02:00
|
|
|
$obj = ($existingObj) ? $existingObj : new $class();
|
2008-08-09 06:53:34 +02:00
|
|
|
|
|
|
|
// first run: find/create any relations and store them on the object
|
|
|
|
// we can't combine runs, as other columns might rely on the relation being present
|
|
|
|
$relations = array();
|
2008-08-09 07:45:43 +02:00
|
|
|
foreach($record as $origColumnName => $val) {
|
|
|
|
$fieldName = $columnMap[$origColumnName];
|
|
|
|
|
|
|
|
// don't bother querying of value is not set
|
|
|
|
if($this->isNullValue($val)) continue;
|
|
|
|
|
2008-08-09 07:00:42 +02:00
|
|
|
// checking for existing relations
|
2008-08-09 07:45:43 +02:00
|
|
|
if(isset($this->relationCallbacks[$fieldName])) {
|
2008-08-09 06:53:34 +02:00
|
|
|
// trigger custom search method for finding a relation based on the given value
|
|
|
|
// and write it back to the relation (or create a new object)
|
2008-08-09 07:45:43 +02:00
|
|
|
$relationName = $this->relationCallbacks[$fieldName]['relationname'];
|
|
|
|
$relationObj = $obj->{$this->relationCallbacks[$fieldName]['callback']}($val, $record);
|
2008-08-09 06:53:34 +02:00
|
|
|
if(!$relationObj || !$relationObj->exists()) {
|
|
|
|
$relationClass = $obj->has_one($relationName);
|
|
|
|
$relationObj = new $relationClass();
|
|
|
|
$relationObj->write();
|
|
|
|
}
|
|
|
|
$obj->setComponent($relationName, $relationObj);
|
|
|
|
$obj->{"{$relationName}ID"} = $relationObj->ID;
|
2008-08-09 07:00:42 +02:00
|
|
|
$obj->write();
|
2008-08-09 07:45:43 +02:00
|
|
|
} elseif(strpos($fieldName, '.') !== false) {
|
2008-08-09 06:53:34 +02:00
|
|
|
// we have a relation column with dot notation
|
2008-08-09 07:45:43 +02:00
|
|
|
list($relationName,$columnName) = split('\.', $fieldName);
|
2008-08-09 06:53:34 +02:00
|
|
|
$relationObj = $obj->getComponent($relationName); // always gives us an component (either empty or existing)
|
|
|
|
$obj->setComponent($relationName, $relationObj);
|
|
|
|
$relationObj->write();
|
|
|
|
$obj->{"{$relationName}ID"} = $relationObj->ID;
|
2008-08-09 07:00:42 +02:00
|
|
|
$obj->write();
|
2008-08-09 06:53:34 +02:00
|
|
|
}
|
2008-08-09 07:00:42 +02:00
|
|
|
|
2008-08-09 06:53:34 +02:00
|
|
|
$obj->flushCache(); // avoid relation caching confusion
|
|
|
|
}
|
|
|
|
$id = ($preview) ? 0 : $obj->write();
|
|
|
|
|
|
|
|
// second run: save data
|
2008-08-09 07:45:43 +02:00
|
|
|
foreach($record as $origColumnName => $val) {
|
|
|
|
$fieldName = $columnMap[$origColumnName];
|
|
|
|
|
|
|
|
if($this->isNullValue($val, $fieldName)) continue;
|
|
|
|
|
|
|
|
if($obj->hasMethod("import{$fieldName}")) {
|
|
|
|
$obj->{"import{$fieldName}"}($val, $record);
|
|
|
|
} elseif(strpos($fieldName, '.') !== false) {
|
2008-08-09 06:53:34 +02:00
|
|
|
// we have a relation column
|
2008-08-09 07:45:43 +02:00
|
|
|
list($relationName,$columnName) = split('\.', $fieldName);
|
2008-08-09 06:53:34 +02:00
|
|
|
$relationObj = $obj->getComponent($relationName);
|
|
|
|
$relationObj->{$columnName} = $val;
|
|
|
|
$relationObj->write();
|
|
|
|
$obj->flushCache(); // avoid relation caching confusion
|
2008-08-09 07:45:43 +02:00
|
|
|
//} elseif($obj->hasField($fieldName) || $obj->hasMethod($fieldName)) {
|
|
|
|
} else {
|
2008-08-09 06:53:34 +02:00
|
|
|
// plain old value setter
|
2008-08-09 07:45:43 +02:00
|
|
|
$obj->{$fieldName} = $val;
|
2008-08-09 06:53:34 +02:00
|
|
|
}
|
|
|
|
}
|
2008-09-15 16:21:43 +02:00
|
|
|
|
|
|
|
// write record
|
2008-08-09 06:53:34 +02:00
|
|
|
$id = ($preview) ? 0 : $obj->write();
|
2008-09-15 16:21:43 +02:00
|
|
|
|
|
|
|
// @todo better message support
|
2008-08-09 06:53:34 +02:00
|
|
|
$message = '';
|
|
|
|
|
2008-09-15 16:21:43 +02:00
|
|
|
// save to results
|
|
|
|
if($existingObj) {
|
|
|
|
$results->addUpdated($obj, $message);
|
|
|
|
} else {
|
|
|
|
$results->addCreated($obj, $message);
|
|
|
|
}
|
|
|
|
|
2008-08-09 06:53:34 +02:00
|
|
|
// memory usage
|
2008-08-09 07:00:42 +02:00
|
|
|
unset($existingObj);
|
2008-08-09 06:53:34 +02:00
|
|
|
unset($obj);
|
|
|
|
}
|
|
|
|
|
2008-08-09 07:00:42 +02:00
|
|
|
/**
|
|
|
|
* Find an existing objects based on one or more uniqueness
|
|
|
|
* columns specified via {@link self::$duplicateChecks}
|
|
|
|
*
|
|
|
|
* @param array $record CSV data column
|
2008-08-09 07:45:43 +02:00
|
|
|
* @param array $columnMap
|
2008-08-09 07:00:42 +02:00
|
|
|
* @return unknown
|
|
|
|
*/
|
2008-08-09 07:45:43 +02:00
|
|
|
public function findExistingObject($record, $columnMap) {
|
2008-08-09 07:00:42 +02:00
|
|
|
// checking for existing records (only if not already found)
|
|
|
|
foreach($this->duplicateChecks as $fieldName => $duplicateCheck) {
|
|
|
|
if(is_string($duplicateCheck)) {
|
|
|
|
$SQL_fieldName = Convert::raw2sql($duplicateCheck);
|
2008-08-09 07:45:43 +02:00
|
|
|
if(!isset($record[$fieldName])) {
|
2008-09-12 06:49:15 +02:00
|
|
|
return false;
|
|
|
|
//user_error("CsvBulkLoader:processRecord: Couldn't find duplicate identifier '{$fieldName}' in columns", E_USER_ERROR);
|
2008-08-09 07:45:43 +02:00
|
|
|
}
|
|
|
|
$SQL_fieldValue = $record[$fieldName];
|
2008-08-09 07:00:42 +02:00
|
|
|
$existingRecord = DataObject::get_one($this->objectClass, "`$SQL_fieldName` = '{$SQL_fieldValue}'");
|
|
|
|
if($existingRecord) return $existingRecord;
|
|
|
|
} elseif(is_array($duplicateCheck) && isset($duplicateCheck['callback'])) {
|
2008-08-09 07:45:43 +02:00
|
|
|
$existingRecord = singleton($this->objectClass)->{$duplicateCheck['callback']}($record[$fieldName], $record);
|
2008-08-09 07:00:42 +02:00
|
|
|
if($existingRecord) return $existingRecord;
|
|
|
|
} else {
|
|
|
|
user_error('CsvBulkLoader:processRecord: Wrong format for $duplicateChecks', E_USER_ERROR);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2008-08-09 06:53:34 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Determine wether any loaded files should be parsed
|
|
|
|
* with a header-row (otherwise we rely on {@link self::$columnMap}.
|
|
|
|
*
|
|
|
|
* @return boolean
|
|
|
|
*/
|
|
|
|
public function hasHeaderRow() {
|
|
|
|
return ($this->hasHeaderRow || isset($this->columnMap));
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
?>
|