API CHANGE: Created CSVParser class and updated CSVBulkLoader to use it

git-svn-id: svn://svn.silverstripe.com/silverstripe/open/modules/sapphire/trunk@63563 467b73ca-7a2a-4603-9d3b-597d59a354a9
This commit is contained in:
Sam Minnee 2008-10-03 00:27:58 +00:00
parent 882b4bc4e1
commit cbc228f9a2
10 changed files with 343 additions and 81 deletions

234
dev/CSVParser.php Normal file
View File

@ -0,0 +1,234 @@
<?php
/**
* Class to handle parsing of CSV files, where the column headers are in the first row.
* The idea is that you pass it another object to handle the actual procesing of the data in the CSV file.
*
* Usage:
* <code>
* $parser = new CSVParser('myfile.csv');
* $parser->mapColumns(
* 'first name' => 'FirstName'
* 'lastname' => 'Surname',
* 'last name' => 'Surname'
* ));
* foreach($parser as $row) {
* // $row is a map of column name => column value
* $obj = new MyDataObject();
* $obj->update($row);
* $obj->write();
* }
* </code>
*/
class CSVParser extends Object implements Iterator {
protected $filename;
protected $fileHandle;
/**
* Map of source columns to output columns
* Once they get into this variable, all of the source columns are in lowercase
*/
protected $columnMap = array();
/**
* The header row used to map data in the CSV file
* To begin with, this is null. Once it has been set, data will get returned from the CSV file
*/
protected $headerRow = null;
/**
* A custom header row provided by the caller
*/
protected $providedHeaderRow = null;
/**
* The data of the current row
*/
protected $currentRow = null;
/**
* The current row number
* 1 is the first data row in the CSV file; the header row, if it exists, is ignored
*/
protected $rowNum = 0;
/**
* The character for separating columns
*/
protected $delimiter = ",";
/**
* The character for quoting colums
*/
protected $enclosure = '"';
/**
* Open a CSV file for parsing.
* You can use the object returned in a foreach loop to extract the data
* @param $filename The name of the file. If relative, it will be relative to the site's base dir
* @param $delimiter The character for seperating columns
* @param $enclosure The character for quoting or enclosing columns
*/
function __construct($filename, $delimiter = ",", $enclosure = '"') {
if($filename['0'] != '/') $filename = Director::baseFolder() . '/' . $filename;
$this->filename = $filename;
$tis->delimiter = ",";
$this->enclosure = '"';
}
/**
* Re-map columns in the CSV file.
* This can be useful for identifying synonyms in the file
* For example:
* <code>
* $csv->mapColumns(array(
* 'firstname' => 'FirstName',
* 'last name' => 'Surname',
* ));
* </code>
*/
function mapColumns($columnMap) {
if($columnMap) {
$lowerColumnMap = array();
foreach($columnMap as $k => $v) {
$lowerColumnMap[strtolower($k)] = $v;
}
$this->columnMap = array_merge($this->columnMap, $lowerColumnMap);
}
}
/**
* If your CSV file doesn't have a header row, then you can call this function to provide one.
* If you call this function, then the first row of the CSV will be included in the data returned.
*/
function provideHeaderRow($headerRow) {
$this->providedHeaderRow = $headerRow;
}
/**
* Open the CSV file for reading
*/
protected function openFile() {
ini_set('auto_detect_line_endings',1);
$this->fileHandle = fopen($this->filename,'r');
if($this->providedHeaderRow) {
$this->headerRow = $this->remapHeader($this->providedHeaderRow);
}
}
/**
* Close the CSV file and re-set all of the internal variables
*/
protected function closeFile() {
if($this->fileHandle) fclose($this->fileHandle);
$this->fileHandle = null;
$this->rowNum = 0;
$this->currentRow = null;
$this->headerRow = null;
}
/**
* Get a header row from the CSV file
*/
protected function fetchCSVHeader() {
$srcRow = fgetcsv($this->fileHandle, 0, $this->delimiter, $this->enclosure);
$this->headerRow = $this->remapHeader($srcRow);
}
/**
* Map the contents of a header array using $this->mappedColumns
*/
protected function remapHeader($header) {
$mappedHeader = array();
foreach($header as $item) {
if(isset($this->columnMap[strtolower($item)])) $item = $this->columnMap[strtolower($item)];
$mappedHeader[] = $item;
}
return $mappedHeader;
}
/**
* Get a row from the CSV file and update $this->currentRow;
*/
protected function fetchCSVRow() {
if(!$this->fileHandle) $this->openFile();
if(!$this->headerRow) $this->fetchCSVHeader();
$this->rowNum++;
$srcRow = fgetcsv($this->fileHandle, 0, $this->delimiter, $this->enclosure);
if($srcRow) {
$row = array();
foreach($srcRow as $i => $value) {
// Allow escaping of quotes and commas in the data
$value = str_replace(
array('\\'.$this->enclosure,'\\'.$this->delimiter),
array($this->enclosure,$this->delimiter),$value);
if(array_key_exists($i, $this->headerRow)) {
if($this->headerRow[$i]) $row[$this->headerRow[$i]] = $value;
} else {
user_error("No heading for column $i on row $this->rowNum", E_USER_WARNING);
}
}
$this->currentRow = $row;
} else {
$this->closeFile();
}
return $this->currentRow;
}
/**
* @ignore
*/
function __destruct() {
$this->closeFile();
}
//// ITERATOR FUNCTIONS
/**
* @ignore
*/
function rewind() {
$this->closeFile();
$this->fetchCSVRow();
}
/**
* @ignore
*/
function current() {
return $this->currentRow;
}
/**
* @ignore
*/
function key() {
return $this->rowNum;
}
/**
* @ignore
*/
function next() {
$this->fetchCSVRow();
return $this->currentRow;
}
/**
* @ignore
*/
function valid() {
return $this->currentRow ? true : false;
}
}
?>

View File

@ -27,7 +27,8 @@ class CsvBulkLoader extends BulkLoader {
public $enclosure = '"'; public $enclosure = '"';
/** /**
* Identifies if the loaded file has a header row. * Identifies if the
* has a header row.
* If a {@link self::$columnMap} is passed, we assume * If a {@link self::$columnMap} is passed, we assume
* the file has no headerrow, unless explicitly noted. * the file has no headerrow, unless explicitly noted.
* *
@ -36,67 +37,26 @@ class CsvBulkLoader extends BulkLoader {
public $hasHeaderRow = false; public $hasHeaderRow = false;
protected function processAll($filepath, $preview = false) { protected function processAll($filepath, $preview = false) {
ini_set('auto_detect_line_endings',1);
$file = fopen($filepath, 'r');
if(!$file) return false;
$results = new BulkLoader_Result(); $results = new BulkLoader_Result();
if($this->hasHeaderRow && $this->columnMap) { $csv = new CSVParser($filepath, $this->delimiter, $this->enclosure);
$columnRow = fgetcsv($file, 0, $this->delimiter, $this->enclosure);
$columnMap = array();
foreach($columnRow as $k => $origColumnName) {
$origColumnName = trim($origColumnName);
if(isset($this->columnMap[$origColumnName])) {
$columnMap[$origColumnName] = $this->columnMap[$origColumnName];
} else {
$columnMap[$origColumnName] = null;
}
} // ColumnMap has two uses, depending on whether hasHeaderRow is set
} elseif($this->columnMap) { if($this->columnMap) {
$columnMap = $this->columnMap; if($this->hasHeaderRow) $csv->mapColumns($this->columnMap);
} else { else $csv->provideHeaderRow($this->columnMap);
// assuming that first row is column naming if no columnmap is passed
$columnRow = fgetcsv($file, 0, $this->delimiter, $this->enclosure);
$columnMap = array_combine($columnRow, $columnRow);
} }
$rowIndex = 0; foreach($csv as $row) {
$rowIndex = 0; $this->processRecord($row, array(), $results, $preview);
while (($row = fgetcsv($file, 0, $this->delimiter, $this->enclosure)) !== FALSE) {
$rowIndex++;
/*
// the columnMap should have the same amount of columns as each record row
if(count(array_keys($columnMap)) == count(array_values($row))) {
user_error("CsvBulkLoader::processAll(): Columns in row {$rowIndex} don't match the \$columnMap", E_USER_WARNING);
}
*/
$indexedRow = array();
foreach($columnMap as $origColumnName => $fieldName) {
// in case the row has less fields than the columnmap,
// ignore the "leftover" mappings
if(!isset($row[count($indexedRow)])) {
user_error("CsvBulkLoader::processAll(): Columns in row {$rowIndex} don't match the \$columnMap", E_USER_NOTICE);
continue;
}
$indexedRow[$origColumnName] = $row[count($indexedRow)];
}
$this->processRecord($indexedRow, $columnMap, $results);
} }
fclose($file);
return $results; return $results;
} }
/** /**
* @todo Better messages for relation checks and duplicate detection * @todo Better messages for relation checks and duplicate detection
* Note that columnMap isn't used
*/ */
protected function processRecord($record, $columnMap, &$results, $preview = false) { protected function processRecord($record, $columnMap, &$results, $preview = false) {
$class = $this->objectClass; $class = $this->objectClass;
@ -108,9 +68,7 @@ class CsvBulkLoader extends BulkLoader {
// first run: find/create any relations and store them on the object // first run: find/create any relations and store them on the object
// we can't combine runs, as other columns might rely on the relation being present // we can't combine runs, as other columns might rely on the relation being present
$relations = array(); $relations = array();
foreach($record as $origColumnName => $val) { foreach($record as $fieldName => $val) {
$fieldName = $columnMap[$origColumnName];
// don't bother querying of value is not set // don't bother querying of value is not set
if($this->isNullValue($val)) continue; if($this->isNullValue($val)) continue;
@ -128,6 +86,7 @@ class CsvBulkLoader extends BulkLoader {
$obj->setComponent($relationName, $relationObj); $obj->setComponent($relationName, $relationObj);
$obj->{"{$relationName}ID"} = $relationObj->ID; $obj->{"{$relationName}ID"} = $relationObj->ID;
$obj->write(); $obj->write();
} elseif(strpos($fieldName, '.') !== false) { } elseif(strpos($fieldName, '.') !== false) {
// we have a relation column with dot notation // we have a relation column with dot notation
list($relationName,$columnName) = split('\.', $fieldName); list($relationName,$columnName) = split('\.', $fieldName);
@ -143,24 +102,13 @@ class CsvBulkLoader extends BulkLoader {
$id = ($preview) ? 0 : $obj->write(); $id = ($preview) ? 0 : $obj->write();
// second run: save data // second run: save data
foreach($record as $origColumnName => $val) { foreach($record as $fieldName => $val) {
$fieldName = $columnMap[$origColumnName];
if($this->isNullValue($val, $fieldName)) continue; if($this->isNullValue($val, $fieldName)) continue;
if($obj->hasMethod("import{$fieldName}")) { if($obj->hasMethod("import{$fieldName}")) {
$obj->{"import{$fieldName}"}($val, $record); $obj->{"import{$fieldName}"}($val, $record);
} elseif(strpos($fieldName, '.') !== false) {
// we have a relation column
list($relationName,$columnName) = split('\.', $fieldName);
$relationObj = $obj->getComponent($relationName);
$relationObj->{$columnName} = $val;
$relationObj->write();
$obj->flushCache(); // avoid relation caching confusion
//} elseif($obj->hasField($fieldName) || $obj->hasMethod($fieldName)) {
} else { } else {
// plain old value setter $obj->update(array($fieldName => $val));
$obj->{$fieldName} = $val;
} }
} }
@ -187,10 +135,9 @@ class CsvBulkLoader extends BulkLoader {
* columns specified via {@link self::$duplicateChecks} * columns specified via {@link self::$duplicateChecks}
* *
* @param array $record CSV data column * @param array $record CSV data column
* @param array $columnMap
* @return unknown * @return unknown
*/ */
public function findExistingObject($record, $columnMap) { public function findExistingObject($record) {
// checking for existing records (only if not already found) // checking for existing records (only if not already found)
foreach($this->duplicateChecks as $fieldName => $duplicateCheck) { foreach($this->duplicateChecks as $fieldName => $duplicateCheck) {
if(is_string($duplicateCheck)) { if(is_string($duplicateCheck)) {

View File

@ -0,0 +1,80 @@
<?php
class CSVParserTest extends SapphireTest {
function testParsingWithHeaders() {
/* By default, a CSV file will be interpreted as having headers */
$csv = new CSVParser('sapphire/tests/dev/CSVBulkLoaderTest_PlayersWithHeader.csv');
$firstNames = $birthdays = $biographies = array();
foreach($csv as $record) {
/* Each row in the CSV file will be keyed with the header row */
$this->assertEquals(array('FirstName','Biography','Birthday'), array_keys($record));
$firstNames[] = $record['FirstName'];
$biographies[] = $record['Biography'];
$birthdays[] = $record['Birthday'];
}
$this->assertEquals(array('John','Jane','Jamie','Järg'), $firstNames);
$this->assertEquals(array(
"He's a good guy",
"She is awesome.\nSo awesome that she gets multiple rows and \"escaped\" strings in her biography",
"Pretty old, with an escaped comma",
"Unicode FTW"), $biographies);
$this->assertEquals(array("31/01/1988","31/01/1982","31/01/1882","31/06/1982"), $birthdays);
}
function testParsingWithHeadersAndColumnMap() {
/* By default, a CSV file will be interpreted as having headers */
$csv = new CSVParser('sapphire/tests/dev/CSVBulkLoaderTest_PlayersWithHeader.csv');
/* We can set up column remapping. The keys are case-insensitive. */
$csv->mapColumns(array(
'FirstName' => '__fn',
'bIoGrApHy' => '__BG',
));
$firstNames = $birthdays = $biographies = array();
foreach($csv as $record) {
/* Each row in the CSV file will be keyed with the renamed columns. Any unmapped column names will be left as-is. */
$this->assertEquals(array('__fn','__BG','Birthday'), array_keys($record));
$firstNames[] = $record['__fn'];
$biographies[] = $record['__BG'];
$birthdays[] = $record['Birthday'];
}
$this->assertEquals(array('John','Jane','Jamie','Järg'), $firstNames);
$this->assertEquals(array(
"He's a good guy",
"She is awesome.\nSo awesome that she gets multiple rows and \"escaped\" strings in her biography",
"Pretty old, with an escaped comma",
"Unicode FTW"), $biographies);
$this->assertEquals(array("31/01/1988","31/01/1982","31/01/1882","31/06/1982"), $birthdays);
}
function testParsingWithExplicitHeaderRow() {
/* If your CSV file doesn't have a header row */
$csv = new CSVParser('sapphire/tests/dev/CSVBulkLoaderTest_PlayersWithHeader.csv');
$csv->provideHeaderRow(array('__fn','__bio','__bd'));
$firstNames = $birthdays = $biographies = array();
foreach($csv as $record) {
/* Each row in the CSV file will be keyed with the header row that you gave */
$this->assertEquals(array('__fn','__bio','__bd'), array_keys($record));
$firstNames[] = $record['__fn'];
$biographies[] = $record['__bio'];
$birthdays[] = $record['__bd'];
}
/* And the first row will be returned in the data */
$this->assertEquals(array('FirstName','John','Jane','Jamie','Järg'), $firstNames);
$this->assertEquals(array(
'Biography',
"He's a good guy",
"She is awesome.\nSo awesome that she gets multiple rows and \"escaped\" strings in her biography",
"Pretty old, with an escaped comma",
"Unicode FTW"), $biographies);
$this->assertEquals(array("Birthday","31/01/1988","31/01/1982","31/01/1882","31/06/1982"), $birthdays);
}
}

View File

@ -5,14 +5,14 @@
* @todo Test with columnn headers and custom mappings * @todo Test with columnn headers and custom mappings
*/ */
class CsvBulkLoaderTest extends SapphireTest { class CsvBulkLoaderTest extends SapphireTest {
static $fixture_file = 'sapphire/tests/CsvBulkLoaderTest.yml'; static $fixture_file = 'sapphire/tests/dev/CsvBulkLoaderTest.yml';
/** /**
* Test plain import with column auto-detection * Test plain import with column auto-detection
*/ */
function testLoad() { function testLoad() {
$loader = new CsvBulkLoader('CsvBulkLoaderTest_Player'); $loader = new CsvBulkLoader('CsvBulkLoaderTest_Player');
$filepath = Director::baseFolder() . '/sapphire/tests/CsvBulkLoaderTest_PlayersWithHeader.csv'; $filepath = Director::baseFolder() . '/sapphire/tests/dev/CsvBulkLoaderTest_PlayersWithHeader.csv';
$file = fopen($filepath, 'r'); $file = fopen($filepath, 'r');
$compareCount = $this->getLineCount($file); $compareCount = $this->getLineCount($file);
fgetcsv($file); // pop header row fgetcsv($file); // pop header row
@ -20,7 +20,7 @@ class CsvBulkLoaderTest extends SapphireTest {
$results = $loader->load($filepath); $results = $loader->load($filepath);
// Test that right amount of columns was imported // Test that right amount of columns was imported
$this->assertEquals($results->Count(), $compareCount-1, 'Test correct count of imported data'); $this->assertEquals(4, $results->Count(), 'Test correct count of imported data');
// Test that columns were correctly imported // Test that columns were correctly imported
$obj = Dataobject::get_one("CsvBulkLoaderTest_Player", "FirstName = 'John'"); $obj = Dataobject::get_one("CsvBulkLoaderTest_Player", "FirstName = 'John'");
@ -36,7 +36,7 @@ class CsvBulkLoaderTest extends SapphireTest {
*/ */
function testLoadWithColumnMap() { function testLoadWithColumnMap() {
$loader = new CsvBulkLoader('CsvBulkLoaderTest_Player'); $loader = new CsvBulkLoader('CsvBulkLoaderTest_Player');
$filepath = Director::baseFolder() . '/sapphire/tests/CsvBulkLoaderTest_Players.csv'; $filepath = Director::baseFolder() . '/sapphire/tests/dev/CsvBulkLoaderTest_Players.csv';
$file = fopen($filepath, 'r'); $file = fopen($filepath, 'r');
$compareCount = $this->getLineCount($file); $compareCount = $this->getLineCount($file);
$compareRow = fgetcsv($file); $compareRow = fgetcsv($file);
@ -49,7 +49,7 @@ class CsvBulkLoaderTest extends SapphireTest {
$results = $loader->load($filepath); $results = $loader->load($filepath);
// Test that right amount of columns was imported // Test that right amount of columns was imported
$this->assertEquals($results->Count(), $compareCount, 'Test correct count of imported data'); $this->assertEquals(4, $results->Count(), 'Test correct count of imported data');
// Test that columns were correctly imported // Test that columns were correctly imported
$obj = Dataobject::get_one("CsvBulkLoaderTest_Player", "FirstName = 'John'"); $obj = Dataobject::get_one("CsvBulkLoaderTest_Player", "FirstName = 'John'");
@ -65,7 +65,7 @@ class CsvBulkLoaderTest extends SapphireTest {
*/ */
function testLoadWithCustomHeaderAndRelation() { function testLoadWithCustomHeaderAndRelation() {
$loader = new CsvBulkLoader('CsvBulkLoaderTest_Player'); $loader = new CsvBulkLoader('CsvBulkLoaderTest_Player');
$filepath = Director::baseFolder() . '/sapphire/tests/CsvBulkLoaderTest_PlayersWithCustomHeaderAndRelation.csv'; $filepath = Director::baseFolder() . '/sapphire/tests/dev/CsvBulkLoaderTest_PlayersWithCustomHeaderAndRelation.csv';
$file = fopen($filepath, 'r'); $file = fopen($filepath, 'r');
$compareCount = $this->getLineCount($file); $compareCount = $this->getLineCount($file);
fgetcsv($file); // pop header row fgetcsv($file); // pop header row
@ -89,7 +89,7 @@ class CsvBulkLoaderTest extends SapphireTest {
$results = $loader->load($filepath); $results = $loader->load($filepath);
// Test that right amount of columns was imported // Test that right amount of columns was imported
$this->assertEquals($results->Count(), $compareCount-1, 'Test correct count of imported data'); $this->assertEquals(1, $results->Count(), 'Test correct count of imported data');
// Test of augumenting existing relation (created by fixture) // Test of augumenting existing relation (created by fixture)
$testTeam = DataObject::get_one('CsvBulkLoaderTest_Team', null, null, 'Created DESC'); $testTeam = DataObject::get_one('CsvBulkLoaderTest_Team', null, null, 'Created DESC');
@ -115,7 +115,7 @@ class CsvBulkLoaderTest extends SapphireTest {
function testLoadWithIdentifiers() { function testLoadWithIdentifiers() {
// first load // first load
$loader = new CsvBulkLoader('CsvBulkLoaderTest_Player'); $loader = new CsvBulkLoader('CsvBulkLoaderTest_Player');
$filepath = Director::baseFolder() . '/sapphire/tests/CsvBulkLoaderTest_PlayersWithId.csv'; $filepath = Director::baseFolder() . '/sapphire/tests/dev/CsvBulkLoaderTest_PlayersWithId.csv';
$loader->duplicateChecks = array( $loader->duplicateChecks = array(
'ExternalIdentifier' => 'ExternalIdentifier' 'ExternalIdentifier' => 'ExternalIdentifier'
); );
@ -126,7 +126,7 @@ class CsvBulkLoaderTest extends SapphireTest {
$this->assertEquals($player->Biography, 'He\'s a good guy', 'test updating of duplicate imports within the same import works'); $this->assertEquals($player->Biography, 'He\'s a good guy', 'test updating of duplicate imports within the same import works');
// load with updated data // load with updated data
$filepath = Director::baseFolder() . '/sapphire/tests/CsvBulkLoaderTest_PlayersWithIdUpdated.csv'; $filepath = Director::baseFolder() . '/sapphire/tests/dev/CsvBulkLoaderTest_PlayersWithIdUpdated.csv';
$results = $loader->load($filepath); $results = $loader->load($filepath);
$player = DataObject::get_by_id('CsvBulkLoaderTest_Player', 1); $player = DataObject::get_by_id('CsvBulkLoaderTest_Player', 1);

View File

Can't render this file because it contains an unexpected character in line 2 and column 70.

View File

@ -1,5 +1,6 @@
"FirstName","Biography","Birthday" "FirstName","Biography","Birthday"
"John","He's a good guy","31/01/1988" "John","He's a good guy","31/01/1988"
"Jane","She is awesome.\nSo awesome that she gets multiple rows and \"escaped\" strings in her biography","31/01/1982" "Jane","She is awesome.
So awesome that she gets multiple rows and \"escaped\" strings in her biography","31/01/1982"
"Jamie","Pretty old\, with an escaped comma","31/01/1882" "Jamie","Pretty old\, with an escaped comma","31/01/1882"
"Järg","Unicode FTW","31/06/1982" "Järg","Unicode FTW","31/06/1982"
Can't render this file because it contains an unexpected character in line 3 and column 70.