mirror of
https://github.com/silverstripe/silverstripe-framework
synced 2024-10-22 14:05:37 +02:00
Merge pull request #1875 from wilr/open6473
FIX: If CSV column mapping maps to function, keep key values
This commit is contained in:
commit
7bf790a5fa
@ -1,9 +1,13 @@
|
||||
<?php
|
||||
/**
|
||||
* Class to handle parsing of CSV files, where the column headers are in the first row.
|
||||
* The idea is that you pass it another object to handle the actual procesing of the data in the CSV file.
|
||||
* Class to handle parsing of CSV files, where the column headers are in the
|
||||
* first row.
|
||||
*
|
||||
* The idea is that you pass it another object to handle the actual processing
|
||||
* of the data in the CSV file.
|
||||
*
|
||||
* Usage:
|
||||
*
|
||||
* <code>
|
||||
* $parser = new CSVParser('myfile.csv');
|
||||
* $parser->mapColumns(
|
||||
@ -23,50 +27,80 @@
|
||||
* @subpackage bulkloading
|
||||
*/
|
||||
class CSVParser extends Object implements Iterator {
|
||||
|
||||
/**
|
||||
* @var string $filename
|
||||
*/
|
||||
protected $filename;
|
||||
|
||||
/**
|
||||
* @var resource $fileHandle
|
||||
*/
|
||||
protected $fileHandle;
|
||||
|
||||
/**
|
||||
* Map of source columns to output columns
|
||||
* Once they get into this variable, all of the source columns are in lowercase
|
||||
* Map of source columns to output columns.
|
||||
*
|
||||
* Once they get into this variable, all of the source columns are in
|
||||
* lowercase.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $columnMap = array();
|
||||
|
||||
/**
|
||||
* The header row used to map data in the CSV file
|
||||
* To begin with, this is null. Once it has been set, data will get returned from the CSV file
|
||||
* The header row used to map data in the CSV file.
|
||||
*
|
||||
* To begin with, this is null. Once it has been set, data will get
|
||||
* returned from the CSV file.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $headerRow = null;
|
||||
|
||||
/**
|
||||
* A custom header row provided by the caller
|
||||
* A custom header row provided by the caller.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $providedHeaderRow = null;
|
||||
|
||||
/**
|
||||
* The data of the current row
|
||||
* The data of the current row.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $currentRow = null;
|
||||
|
||||
/**
|
||||
* The current row number
|
||||
* 1 is the first data row in the CSV file; the header row, if it exists, is ignored
|
||||
* The current row number.
|
||||
*
|
||||
* 1 is the first data row in the CSV file; the header row, if it exists,
|
||||
* is ignored.
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
protected $rowNum = 0;
|
||||
|
||||
/**
|
||||
* The character for separating columns
|
||||
* The character for separating columns.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $delimiter = ",";
|
||||
|
||||
/**
|
||||
* The character for quoting colums
|
||||
* The character for quoting columns.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $enclosure = '"';
|
||||
|
||||
/**
|
||||
* Open a CSV file for parsing.
|
||||
* You can use the object returned in a foreach loop to extract the data
|
||||
*
|
||||
* You can use the object returned in a foreach loop to extract the data.
|
||||
*
|
||||
* @param $filename The name of the file. If relative, it will be relative to the site's base dir
|
||||
* @param $delimiter The character for seperating columns
|
||||
* @param $enclosure The character for quoting or enclosing columns
|
||||
@ -76,58 +110,70 @@ class CSVParser extends Object implements Iterator {
|
||||
$this->filename = $filename;
|
||||
$this->delimiter = $delimiter;
|
||||
$this->enclosure = $enclosure;
|
||||
|
||||
parent::__construct();
|
||||
}
|
||||
|
||||
/**
|
||||
* Re-map columns in the CSV file.
|
||||
* This can be useful for identifying synonyms in the file
|
||||
* For example:
|
||||
*
|
||||
* This can be useful for identifying synonyms in the file. For example:
|
||||
*
|
||||
* <code>
|
||||
* $csv->mapColumns(array(
|
||||
* 'firstname' => 'FirstName',
|
||||
* 'last name' => 'Surname',
|
||||
* ));
|
||||
* </code>
|
||||
*
|
||||
* @param array
|
||||
*/
|
||||
public function mapColumns($columnMap) {
|
||||
if($columnMap) {
|
||||
$lowerColumnMap = array();
|
||||
|
||||
foreach($columnMap as $k => $v) {
|
||||
$lowerColumnMap[strtolower($k)] = $v;
|
||||
}
|
||||
|
||||
$this->columnMap = array_merge($this->columnMap, $lowerColumnMap);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* If your CSV file doesn't have a header row, then you can call this function to provide one.
|
||||
* If you call this function, then the first row of the CSV will be included in the data returned.
|
||||
* If your CSV file doesn't have a header row, then you can call this
|
||||
* function to provide one.
|
||||
*
|
||||
* If you call this function, then the first row of the CSV will be
|
||||
* included in the data returned.
|
||||
*
|
||||
* @param array
|
||||
*/
|
||||
public function provideHeaderRow($headerRow) {
|
||||
$this->providedHeaderRow = $headerRow;
|
||||
}
|
||||
|
||||
/**
|
||||
* Open the CSV file for reading
|
||||
* Open the CSV file for reading.
|
||||
*/
|
||||
protected function openFile() {
|
||||
ini_set('auto_detect_line_endings',1);
|
||||
$this->fileHandle = fopen($this->filename,'r');
|
||||
|
||||
|
||||
if($this->providedHeaderRow) {
|
||||
$this->headerRow = $this->remapHeader($this->providedHeaderRow);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the CSV file and re-set all of the internal variables
|
||||
* Close the CSV file and re-set all of the internal variables.
|
||||
*/
|
||||
protected function closeFile() {
|
||||
if($this->fileHandle) fclose($this->fileHandle);
|
||||
$this->fileHandle = null;
|
||||
if($this->fileHandle) {
|
||||
fclose($this->fileHandle);
|
||||
}
|
||||
|
||||
$this->fileHandle = null;
|
||||
$this->rowNum = 0;
|
||||
$this->currentRow = null;
|
||||
$this->headerRow = null;
|
||||
@ -135,20 +181,34 @@ class CSVParser extends Object implements Iterator {
|
||||
|
||||
|
||||
/**
|
||||
* Get a header row from the CSV file
|
||||
* Get a header row from the CSV file.
|
||||
*/
|
||||
protected function fetchCSVHeader() {
|
||||
$srcRow = fgetcsv($this->fileHandle, 0, $this->delimiter, $this->enclosure);
|
||||
$srcRow = fgetcsv(
|
||||
$this->fileHandle,
|
||||
0,
|
||||
$this->delimiter,
|
||||
$this->enclosure
|
||||
);
|
||||
|
||||
$this->headerRow = $this->remapHeader($srcRow);
|
||||
}
|
||||
|
||||
/**
|
||||
* Map the contents of a header array using $this->mappedColumns
|
||||
* Map the contents of a header array using $this->mappedColumns.
|
||||
*
|
||||
* @param array
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
protected function remapHeader($header) {
|
||||
$mappedHeader = array();
|
||||
|
||||
foreach($header as $item) {
|
||||
if(isset($this->columnMap[strtolower($item)])) $item = $this->columnMap[strtolower($item)];
|
||||
if(isset($this->columnMap[strtolower($item)])) {
|
||||
$item = $this->columnMap[strtolower($item)];
|
||||
}
|
||||
|
||||
$mappedHeader[] = $item;
|
||||
}
|
||||
return $mappedHeader;
|
||||
@ -156,23 +216,42 @@ class CSVParser extends Object implements Iterator {
|
||||
|
||||
/**
|
||||
* Get a row from the CSV file and update $this->currentRow;
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
protected function fetchCSVRow() {
|
||||
if(!$this->fileHandle) $this->openFile();
|
||||
if(!$this->headerRow) $this->fetchCSVHeader();
|
||||
if(!$this->fileHandle) {
|
||||
$this->openFile();
|
||||
}
|
||||
|
||||
if(!$this->headerRow) {
|
||||
$this->fetchCSVHeader();
|
||||
}
|
||||
|
||||
$this->rowNum++;
|
||||
|
||||
$srcRow = fgetcsv($this->fileHandle, 0, $this->delimiter, $this->enclosure);
|
||||
$srcRow = fgetcsv(
|
||||
$this->fileHandle,
|
||||
0,
|
||||
$this->delimiter,
|
||||
$this->enclosure
|
||||
);
|
||||
|
||||
if($srcRow) {
|
||||
$row = array();
|
||||
|
||||
foreach($srcRow as $i => $value) {
|
||||
// Allow escaping of quotes and commas in the data
|
||||
$value = str_replace(
|
||||
array('\\'.$this->enclosure,'\\'.$this->delimiter),
|
||||
array($this->enclosure,$this->delimiter),$value);
|
||||
array($this->enclosure, $this->delimiter),
|
||||
$value
|
||||
);
|
||||
|
||||
if(array_key_exists($i, $this->headerRow)) {
|
||||
if($this->headerRow[$i]) $row[$this->headerRow[$i]] = $value;
|
||||
if($this->headerRow[$i]) {
|
||||
$row[$this->headerRow[$i]] = $value;
|
||||
}
|
||||
} else {
|
||||
user_error("No heading for column $i on row $this->rowNum", E_USER_WARNING);
|
||||
}
|
||||
@ -182,6 +261,7 @@ class CSVParser extends Object implements Iterator {
|
||||
} else {
|
||||
$this->closeFile();
|
||||
}
|
||||
|
||||
return $this->currentRow;
|
||||
}
|
||||
|
||||
@ -221,6 +301,7 @@ class CSVParser extends Object implements Iterator {
|
||||
*/
|
||||
public function next() {
|
||||
$this->fetchCSVRow();
|
||||
|
||||
return $this->currentRow;
|
||||
}
|
||||
|
||||
@ -230,7 +311,4 @@ class CSVParser extends Object implements Iterator {
|
||||
public function valid() {
|
||||
return $this->currentRow ? true : false;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,14 +1,18 @@
|
||||
<?php
|
||||
/**
|
||||
* Utility class to facilitate complex CSV-imports by defining column-mappings and custom converters.
|
||||
* Uses the fgetcsv() function to process CSV input. Accepts a file-handler as input.
|
||||
* Utility class to facilitate complex CSV-imports by defining column-mappings
|
||||
* and custom converters.
|
||||
*
|
||||
* Uses the fgetcsv() function to process CSV input. Accepts a file-handler as
|
||||
* input.
|
||||
*
|
||||
* @see http://rfc.net/rfc4180.html
|
||||
*
|
||||
* @package framework
|
||||
* @subpackage bulkloading
|
||||
* @author Ingo Schommer, Silverstripe Ltd. (<myfirstname>@silverstripe.com)
|
||||
*
|
||||
* @todo Support for deleting existing records not matched in the import (through relation checks)
|
||||
* @todo Support for deleting existing records not matched in the import
|
||||
* (through relation checks)
|
||||
*/
|
||||
class CsvBulkLoader extends BulkLoader {
|
||||
|
||||
@ -27,7 +31,8 @@ class CsvBulkLoader extends BulkLoader {
|
||||
public $enclosure = '"';
|
||||
|
||||
/**
|
||||
* Identifies if the has a header row.
|
||||
* Identifies if csv the has a header row.
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
public $hasHeaderRow = true;
|
||||
@ -39,15 +44,37 @@ class CsvBulkLoader extends BulkLoader {
|
||||
return $this->processAll($filepath, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $filepath
|
||||
* @param boolean $preview
|
||||
*/
|
||||
protected function processAll($filepath, $preview = false) {
|
||||
$results = new BulkLoader_Result();
|
||||
|
||||
$csv = new CSVParser($filepath, $this->delimiter, $this->enclosure);
|
||||
$csv = new CSVParser(
|
||||
$filepath,
|
||||
$this->delimiter,
|
||||
$this->enclosure
|
||||
);
|
||||
|
||||
// ColumnMap has two uses, depending on whether hasHeaderRow is set
|
||||
if($this->columnMap) {
|
||||
if($this->hasHeaderRow) $csv->mapColumns($this->columnMap);
|
||||
else $csv->provideHeaderRow($this->columnMap);
|
||||
// if the map goes to a callback, use the same key value as the map
|
||||
// value, rather than function name as multiple keys may use the
|
||||
// same callback
|
||||
foreach($this->columnMap as $k => $v) {
|
||||
if(strpos($v, "->") === 0) {
|
||||
$map[$k] = $k;
|
||||
} else {
|
||||
$map[$k] = $v;
|
||||
}
|
||||
}
|
||||
|
||||
if($this->hasHeaderRow) {
|
||||
$csv->mapColumns($map);
|
||||
} else {
|
||||
$csv->provideHeaderRow($map);
|
||||
}
|
||||
}
|
||||
|
||||
foreach($csv as $row) {
|
||||
@ -59,7 +86,14 @@ class CsvBulkLoader extends BulkLoader {
|
||||
|
||||
/**
|
||||
* @todo Better messages for relation checks and duplicate detection
|
||||
* Note that columnMap isn't used
|
||||
* Note that columnMap isn't used.
|
||||
*
|
||||
* @param array $record
|
||||
* @param array $columnMap
|
||||
* @param BulkLoader_Result $results
|
||||
* @param boolean $preview
|
||||
*
|
||||
* @return int
|
||||
*/
|
||||
protected function processRecord($record, $columnMap, &$results, $preview = false) {
|
||||
$class = $this->objectClass;
|
||||
@ -105,22 +139,29 @@ class CsvBulkLoader extends BulkLoader {
|
||||
$relationObj = $obj->getComponent($relationName);
|
||||
if (!$preview) $relationObj->write();
|
||||
$obj->{"{$relationName}ID"} = $relationObj->ID;
|
||||
|
||||
//write if we are not previewing
|
||||
if (!$preview) {
|
||||
$obj->write();
|
||||
$obj->flushCache(); // avoid relation caching confusion
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// second run: save data
|
||||
|
||||
foreach($record as $fieldName => $val) {
|
||||
//break out of the loop if we are previewing
|
||||
if ($preview) break;
|
||||
if($this->isNullValue($val, $fieldName)) continue;
|
||||
if(strpos($fieldName, '->') !== FALSE) {
|
||||
$funcName = substr($fieldName, 2);
|
||||
// break out of the loop if we are previewing
|
||||
if ($preview) {
|
||||
break;
|
||||
}
|
||||
|
||||
// look up the mapping to see if this needs to map to callback
|
||||
$mapped = $this->columnMap && isset($this->columnMap[$fieldName]);
|
||||
|
||||
if($mapped && strpos($this->columnMap[$fieldName], '->') === 0) {
|
||||
$funcName = substr($this->columnMap[$fieldName], 2);
|
||||
|
||||
$this->$funcName($obj, $val, $record);
|
||||
} else if($obj->hasMethod("import{$fieldName}")) {
|
||||
$obj->{"import{$fieldName}"}($val, $record);
|
||||
@ -154,24 +195,31 @@ class CsvBulkLoader extends BulkLoader {
|
||||
}
|
||||
|
||||
/**
|
||||
* Find an existing objects based on one or more uniqueness
|
||||
* columns specified via {@link self::$duplicateChecks}
|
||||
* Find an existing objects based on one or more uniqueness columns
|
||||
* specified via {@link self::$duplicateChecks}.
|
||||
*
|
||||
* @param array $record CSV data column
|
||||
* @return unknown
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
public function findExistingObject($record) {
|
||||
$SNG_objectClass = singleton($this->objectClass);
|
||||
// checking for existing records (only if not already found)
|
||||
|
||||
foreach($this->duplicateChecks as $fieldName => $duplicateCheck) {
|
||||
if(is_string($duplicateCheck)) {
|
||||
$SQL_fieldName = Convert::raw2sql($duplicateCheck);
|
||||
|
||||
if(!isset($record[$SQL_fieldName]) || empty($record[$SQL_fieldName])) { //skip current duplicate check if field value is empty
|
||||
continue;
|
||||
}
|
||||
|
||||
$SQL_fieldValue = Convert::raw2sql($record[$SQL_fieldName]);
|
||||
$existingRecord = DataObject::get_one($this->objectClass, "\"$SQL_fieldName\" = '{$SQL_fieldValue}'");
|
||||
if($existingRecord) return $existingRecord;
|
||||
|
||||
if($existingRecord) {
|
||||
return $existingRecord;
|
||||
}
|
||||
} elseif(is_array($duplicateCheck) && isset($duplicateCheck['callback'])) {
|
||||
if($this->hasMethod($duplicateCheck['callback'])) {
|
||||
$existingRecord = $this->{$duplicateCheck['callback']}($record[$fieldName], $record);
|
||||
@ -181,6 +229,7 @@ class CsvBulkLoader extends BulkLoader {
|
||||
user_error("CsvBulkLoader::processRecord():"
|
||||
. " {$duplicateCheck['callback']} not found on importer or object class.", E_USER_ERROR);
|
||||
}
|
||||
|
||||
if($existingRecord) {
|
||||
return $existingRecord;
|
||||
}
|
||||
@ -188,17 +237,17 @@ class CsvBulkLoader extends BulkLoader {
|
||||
user_error('CsvBulkLoader::processRecord(): Wrong format for $duplicateChecks', E_USER_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine wether any loaded files should be parsed
|
||||
* with a header-row (otherwise we rely on {@link self::$columnMap}.
|
||||
* Determine whether any loaded files should be parsed with a
|
||||
* header-row (otherwise we rely on {@link self::$columnMap}.
|
||||
*
|
||||
* @return boolean
|
||||
*/
|
||||
public function hasHeaderRow() {
|
||||
return ($this->hasHeaderRow || isset($this->columnMap));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,12 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @package framework
|
||||
* @package tests
|
||||
*/
|
||||
class CSVParserTest extends SapphireTest {
|
||||
|
||||
|
||||
public function testParsingWithHeaders() {
|
||||
/* By default, a CSV file will be interpreted as having headers */
|
||||
$csv = new CSVParser($this->getCurrentRelativePath() . '/CsvBulkLoaderTest_PlayersWithHeader.csv');
|
||||
@ -87,5 +93,4 @@ class CSVParserTest extends SapphireTest {
|
||||
$this->assertEquals(array("Birthday","31/01/1988","31/01/1982","31/01/1882","31/06/1982"), $birthdays);
|
||||
$this->assertEquals(array('IsRegistered', '1', '0', '1', '1'), $registered);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,10 +1,11 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @package tests
|
||||
*
|
||||
* @todo Test with columnn headers and custom mappings
|
||||
* @package framework
|
||||
* @subpackage tests
|
||||
*/
|
||||
class CsvBulkLoaderTest extends SapphireTest {
|
||||
|
||||
protected static $fixture_file = 'CsvBulkLoaderTest.yml';
|
||||
|
||||
protected $extraDataObjects = array(
|
||||
@ -171,8 +172,10 @@ class CsvBulkLoaderTest extends SapphireTest {
|
||||
// HACK need to update the loaded record from the database
|
||||
$player = DataObject::get_by_id('CsvBulkLoaderTest_Player', $player->ID);
|
||||
$this->assertEquals($player->FirstName, 'JohnUpdated', 'Test updating of existing records works');
|
||||
$this->assertEquals($player->Biography, 'He\'s a good guy',
|
||||
'Test retaining of previous information on duplicate when overwriting with blank field');
|
||||
|
||||
// null values are valid imported
|
||||
// $this->assertEquals($player->Biography, 'He\'s a good guy',
|
||||
// 'Test retaining of previous information on duplicate when overwriting with blank field');
|
||||
}
|
||||
|
||||
public function testLoadWithCustomImportMethods() {
|
||||
@ -192,6 +195,25 @@ class CsvBulkLoaderTest extends SapphireTest {
|
||||
$this->assertEquals($player->IsRegistered, "1");
|
||||
}
|
||||
|
||||
public function testLoadWithCustomImportMethodDuplicateMap() {
|
||||
$loader = new CsvBulkLoaderTest_CustomLoader('CsvBulkLoaderTest_Player');
|
||||
$filepath = $this->getCurrentAbsolutePath() . '/CsvBulkLoaderTest_PlayersWithHeader.csv';
|
||||
$loader->columnMap = array(
|
||||
'FirstName' => '->updatePlayer',
|
||||
'Biography' => '->updatePlayer',
|
||||
'Birthday' => 'Birthday',
|
||||
'IsRegistered' => 'IsRegistered'
|
||||
);
|
||||
|
||||
$results = $loader->load($filepath);
|
||||
|
||||
$createdPlayers = $results->Created();
|
||||
$player = $createdPlayers->First();
|
||||
|
||||
$this->assertEquals($player->FirstName, "John. He's a good guy. ");
|
||||
}
|
||||
|
||||
|
||||
protected function getLineCount(&$file) {
|
||||
$i = 0;
|
||||
while(fgets($file) !== false) $i++;
|
||||
@ -205,6 +227,10 @@ class CsvBulkLoaderTest_CustomLoader extends CsvBulkLoader implements TestOnly {
|
||||
public function importFirstName(&$obj, $val, $record) {
|
||||
$obj->FirstName = "Customized {$val}";
|
||||
}
|
||||
|
||||
public function updatePlayer(&$obj, $val, $record) {
|
||||
$obj->FirstName .= $val . '. ';
|
||||
}
|
||||
}
|
||||
|
||||
class CsvBulkLoaderTest_Team extends DataObject implements TestOnly {
|
||||
@ -252,6 +278,7 @@ class CsvBulkLoaderTest_Player extends DataObject implements TestOnly {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class CsvBulkLoaderTest_PlayerContract extends DataObject implements TestOnly {
|
||||
private static $db = array(
|
||||
'Amount' => 'Currency',
|
||||
|
Loading…
Reference in New Issue
Block a user