2008-10-03 00:27:58 +00:00
|
|
|
<?php
|
|
|
|
/**
|
2014-08-15 18:53:05 +12:00
|
|
|
* Class to handle parsing of CSV files, where the column headers are in the
|
2013-05-14 22:01:15 +12:00
|
|
|
* first row.
|
|
|
|
*
|
2014-08-15 18:53:05 +12:00
|
|
|
* The idea is that you pass it another object to handle the actual processing
|
2013-05-14 22:01:15 +12:00
|
|
|
* of the data in the CSV file.
|
2014-08-15 18:53:05 +12:00
|
|
|
*
|
2008-10-03 00:27:58 +00:00
|
|
|
* Usage:
|
2013-05-14 22:01:15 +12:00
|
|
|
*
|
2008-10-03 00:27:58 +00:00
|
|
|
* <code>
|
|
|
|
* $parser = new CSVParser('myfile.csv');
|
2016-03-21 14:22:43 +00:00
|
|
|
* $parser->mapColumns(array(
|
2017-10-31 10:17:25 +10:30
|
|
|
* 'first name' => 'FirstName',
|
2008-10-03 00:27:58 +00:00
|
|
|
* 'lastname' => 'Surname',
|
2017-10-31 10:17:25 +10:30
|
|
|
* 'last name' => 'Surname'
|
2008-10-03 00:27:58 +00:00
|
|
|
* ));
|
|
|
|
* foreach($parser as $row) {
|
|
|
|
* // $row is a map of column name => column value
|
|
|
|
* $obj = new MyDataObject();
|
|
|
|
* $obj->update($row);
|
|
|
|
* $obj->write();
|
|
|
|
* }
|
|
|
|
* </code>
|
2014-08-15 18:53:05 +12:00
|
|
|
*
|
2012-04-12 18:02:46 +12:00
|
|
|
* @package framework
|
2009-03-22 22:59:14 +00:00
|
|
|
* @subpackage bulkloading
|
2008-10-03 00:27:58 +00:00
|
|
|
*/
|
|
|
|
class CSVParser extends Object implements Iterator {
|
2013-05-14 22:01:15 +12:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @var string $filename
|
|
|
|
*/
|
2008-10-03 00:27:58 +00:00
|
|
|
protected $filename;
|
2013-05-14 22:01:15 +12:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @var resource $fileHandle
|
|
|
|
*/
|
2008-10-03 00:27:58 +00:00
|
|
|
protected $fileHandle;
|
2014-08-15 18:53:05 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
/**
|
2013-05-14 22:01:15 +12:00
|
|
|
* Map of source columns to output columns.
|
|
|
|
*
|
2014-08-15 18:53:05 +12:00
|
|
|
* Once they get into this variable, all of the source columns are in
|
2013-05-14 22:01:15 +12:00
|
|
|
* lowercase.
|
|
|
|
*
|
|
|
|
* @var array
|
2008-10-03 00:27:58 +00:00
|
|
|
*/
|
|
|
|
protected $columnMap = array();
|
2014-08-15 18:53:05 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
/**
|
2013-05-14 22:01:15 +12:00
|
|
|
* The header row used to map data in the CSV file.
|
|
|
|
*
|
2014-08-15 18:53:05 +12:00
|
|
|
* To begin with, this is null. Once it has been set, data will get
|
2013-05-14 22:01:15 +12:00
|
|
|
* returned from the CSV file.
|
|
|
|
*
|
|
|
|
* @var array
|
2008-10-03 00:27:58 +00:00
|
|
|
*/
|
|
|
|
protected $headerRow = null;
|
2014-08-15 18:53:05 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
/**
|
2013-05-14 22:01:15 +12:00
|
|
|
* A custom header row provided by the caller.
|
|
|
|
*
|
|
|
|
* @var array
|
2008-10-03 00:27:58 +00:00
|
|
|
*/
|
|
|
|
protected $providedHeaderRow = null;
|
2014-08-15 18:53:05 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
/**
|
2013-05-14 22:01:15 +12:00
|
|
|
* The data of the current row.
|
|
|
|
*
|
|
|
|
* @var array
|
2008-10-03 00:27:58 +00:00
|
|
|
*/
|
|
|
|
protected $currentRow = null;
|
2014-08-15 18:53:05 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
/**
|
2013-05-14 22:01:15 +12:00
|
|
|
* The current row number.
|
|
|
|
*
|
2014-08-15 18:53:05 +12:00
|
|
|
* 1 is the first data row in the CSV file; the header row, if it exists,
|
2013-05-14 22:01:15 +12:00
|
|
|
* is ignored.
|
|
|
|
*
|
|
|
|
* @var int
|
2008-10-03 00:27:58 +00:00
|
|
|
*/
|
|
|
|
protected $rowNum = 0;
|
2014-08-15 18:53:05 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
/**
|
2013-05-14 22:01:15 +12:00
|
|
|
* The character for separating columns.
|
|
|
|
*
|
|
|
|
* @var string
|
2008-10-03 00:27:58 +00:00
|
|
|
*/
|
|
|
|
protected $delimiter = ",";
|
2014-08-15 18:53:05 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
/**
|
2013-05-14 22:01:15 +12:00
|
|
|
* The character for quoting columns.
|
|
|
|
*
|
|
|
|
* @var string
|
2008-10-03 00:27:58 +00:00
|
|
|
*/
|
|
|
|
protected $enclosure = '"';
|
2014-08-15 18:53:05 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
/**
|
|
|
|
* Open a CSV file for parsing.
|
2013-05-14 22:01:15 +12:00
|
|
|
*
|
|
|
|
* You can use the object returned in a foreach loop to extract the data.
|
|
|
|
*
|
2008-10-03 00:27:58 +00:00
|
|
|
* @param $filename The name of the file. If relative, it will be relative to the site's base dir
|
|
|
|
* @param $delimiter The character for seperating columns
|
|
|
|
* @param $enclosure The character for quoting or enclosing columns
|
|
|
|
*/
|
2012-09-19 12:07:39 +02:00
|
|
|
public function __construct($filename, $delimiter = ",", $enclosure = '"') {
|
2008-10-12 15:53:25 +00:00
|
|
|
$filename = Director::getAbsFile($filename);
|
2008-10-03 00:27:58 +00:00
|
|
|
$this->filename = $filename;
|
2009-04-29 01:20:24 +00:00
|
|
|
$this->delimiter = $delimiter;
|
|
|
|
$this->enclosure = $enclosure;
|
2013-05-14 22:01:15 +12:00
|
|
|
|
2009-09-18 03:02:19 +00:00
|
|
|
parent::__construct();
|
2008-10-03 00:27:58 +00:00
|
|
|
}
|
2014-08-15 18:53:05 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
/**
|
|
|
|
* Re-map columns in the CSV file.
|
2013-05-14 22:01:15 +12:00
|
|
|
*
|
|
|
|
* This can be useful for identifying synonyms in the file. For example:
|
|
|
|
*
|
2008-10-03 00:27:58 +00:00
|
|
|
* <code>
|
|
|
|
* $csv->mapColumns(array(
|
|
|
|
* 'firstname' => 'FirstName',
|
|
|
|
* 'last name' => 'Surname',
|
|
|
|
* ));
|
|
|
|
* </code>
|
2013-05-14 22:01:15 +12:00
|
|
|
*
|
|
|
|
* @param array
|
2008-10-03 00:27:58 +00:00
|
|
|
*/
|
2012-09-19 12:07:39 +02:00
|
|
|
public function mapColumns($columnMap) {
|
2008-10-03 00:27:58 +00:00
|
|
|
if($columnMap) {
|
|
|
|
$lowerColumnMap = array();
|
2014-08-15 18:53:05 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
foreach($columnMap as $k => $v) {
|
|
|
|
$lowerColumnMap[strtolower($k)] = $v;
|
|
|
|
}
|
2013-05-11 18:05:53 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
$this->columnMap = array_merge($this->columnMap, $lowerColumnMap);
|
|
|
|
}
|
|
|
|
}
|
2013-05-14 22:01:15 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
/**
|
2014-08-15 18:53:05 +12:00
|
|
|
* If your CSV file doesn't have a header row, then you can call this
|
2013-05-14 22:01:15 +12:00
|
|
|
* function to provide one.
|
|
|
|
*
|
2014-08-15 18:53:05 +12:00
|
|
|
* If you call this function, then the first row of the CSV will be
|
2013-05-14 22:01:15 +12:00
|
|
|
* included in the data returned.
|
|
|
|
*
|
|
|
|
* @param array
|
2008-10-03 00:27:58 +00:00
|
|
|
*/
|
2012-09-19 12:07:39 +02:00
|
|
|
public function provideHeaderRow($headerRow) {
|
2008-10-03 00:27:58 +00:00
|
|
|
$this->providedHeaderRow = $headerRow;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2013-05-14 22:01:15 +12:00
|
|
|
* Open the CSV file for reading.
|
2008-10-03 00:27:58 +00:00
|
|
|
*/
|
|
|
|
protected function openFile() {
|
|
|
|
ini_set('auto_detect_line_endings',1);
|
|
|
|
$this->fileHandle = fopen($this->filename,'r');
|
2014-08-15 18:53:05 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
if($this->providedHeaderRow) {
|
|
|
|
$this->headerRow = $this->remapHeader($this->providedHeaderRow);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2013-05-14 22:01:15 +12:00
|
|
|
* Close the CSV file and re-set all of the internal variables.
|
2008-10-03 00:27:58 +00:00
|
|
|
*/
|
|
|
|
protected function closeFile() {
|
2013-05-14 22:01:15 +12:00
|
|
|
if($this->fileHandle) {
|
|
|
|
fclose($this->fileHandle);
|
|
|
|
}
|
2008-10-03 00:27:58 +00:00
|
|
|
|
2013-05-14 22:01:15 +12:00
|
|
|
$this->fileHandle = null;
|
2008-10-03 00:27:58 +00:00
|
|
|
$this->rowNum = 0;
|
|
|
|
$this->currentRow = null;
|
|
|
|
$this->headerRow = null;
|
|
|
|
}
|
2014-08-15 18:53:05 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
|
|
|
|
/**
|
2013-05-14 22:01:15 +12:00
|
|
|
* Get a header row from the CSV file.
|
2008-10-03 00:27:58 +00:00
|
|
|
*/
|
|
|
|
protected function fetchCSVHeader() {
|
2013-05-14 22:01:15 +12:00
|
|
|
$srcRow = fgetcsv(
|
2014-08-15 18:53:05 +12:00
|
|
|
$this->fileHandle,
|
|
|
|
0,
|
|
|
|
$this->delimiter,
|
2013-05-14 22:01:15 +12:00
|
|
|
$this->enclosure
|
|
|
|
);
|
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
$this->headerRow = $this->remapHeader($srcRow);
|
|
|
|
}
|
2014-08-15 18:53:05 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
/**
|
2013-05-14 22:01:15 +12:00
|
|
|
* Map the contents of a header array using $this->mappedColumns.
|
|
|
|
*
|
|
|
|
* @param array
|
|
|
|
*
|
|
|
|
* @return array
|
2008-10-03 00:27:58 +00:00
|
|
|
*/
|
|
|
|
protected function remapHeader($header) {
|
|
|
|
$mappedHeader = array();
|
2013-05-14 22:01:15 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
foreach($header as $item) {
|
2013-05-14 22:01:15 +12:00
|
|
|
if(isset($this->columnMap[strtolower($item)])) {
|
|
|
|
$item = $this->columnMap[strtolower($item)];
|
|
|
|
}
|
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
$mappedHeader[] = $item;
|
|
|
|
}
|
|
|
|
return $mappedHeader;
|
|
|
|
}
|
2014-08-15 18:53:05 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
/**
|
|
|
|
* Get a row from the CSV file and update $this->currentRow;
|
2013-05-14 22:01:15 +12:00
|
|
|
*
|
|
|
|
* @return array
|
2008-10-03 00:27:58 +00:00
|
|
|
*/
|
|
|
|
protected function fetchCSVRow() {
|
2013-05-14 22:01:15 +12:00
|
|
|
if(!$this->fileHandle) {
|
|
|
|
$this->openFile();
|
|
|
|
}
|
|
|
|
|
|
|
|
if(!$this->headerRow) {
|
|
|
|
$this->fetchCSVHeader();
|
|
|
|
}
|
2008-10-03 00:27:58 +00:00
|
|
|
|
|
|
|
$this->rowNum++;
|
2014-08-15 18:53:05 +12:00
|
|
|
|
2013-05-14 22:01:15 +12:00
|
|
|
$srcRow = fgetcsv(
|
2014-08-15 18:53:05 +12:00
|
|
|
$this->fileHandle,
|
|
|
|
0,
|
|
|
|
$this->delimiter,
|
2013-05-14 22:01:15 +12:00
|
|
|
$this->enclosure
|
|
|
|
);
|
2014-08-15 18:53:05 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
if($srcRow) {
|
|
|
|
$row = array();
|
2014-08-15 18:53:05 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
foreach($srcRow as $i => $value) {
|
|
|
|
// Allow escaping of quotes and commas in the data
|
|
|
|
$value = str_replace(
|
2013-05-14 22:01:15 +12:00
|
|
|
array('\\'.$this->enclosure,'\\'.$this->delimiter),
|
|
|
|
array($this->enclosure, $this->delimiter),
|
|
|
|
$value
|
|
|
|
);
|
2014-08-15 18:53:05 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
if(array_key_exists($i, $this->headerRow)) {
|
2013-05-14 22:01:15 +12:00
|
|
|
if($this->headerRow[$i]) {
|
|
|
|
$row[$this->headerRow[$i]] = $value;
|
|
|
|
}
|
2008-10-03 00:27:58 +00:00
|
|
|
} else {
|
|
|
|
user_error("No heading for column $i on row $this->rowNum", E_USER_WARNING);
|
|
|
|
}
|
|
|
|
}
|
2014-08-15 18:53:05 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
$this->currentRow = $row;
|
|
|
|
} else {
|
|
|
|
$this->closeFile();
|
|
|
|
}
|
2013-05-14 22:01:15 +12:00
|
|
|
|
2014-08-15 18:53:05 +12:00
|
|
|
return $this->currentRow;
|
2008-10-03 00:27:58 +00:00
|
|
|
}
|
2014-08-15 18:53:05 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
/**
|
|
|
|
* @ignore
|
|
|
|
*/
|
2012-09-19 12:07:39 +02:00
|
|
|
public function __destruct() {
|
2008-10-03 00:27:58 +00:00
|
|
|
$this->closeFile();
|
|
|
|
}
|
|
|
|
|
|
|
|
//// ITERATOR FUNCTIONS
|
|
|
|
|
|
|
|
/**
|
2014-08-15 18:53:05 +12:00
|
|
|
* @ignore
|
2008-10-03 00:27:58 +00:00
|
|
|
*/
|
2012-09-19 12:07:39 +02:00
|
|
|
public function rewind() {
|
2008-10-03 00:27:58 +00:00
|
|
|
$this->closeFile();
|
|
|
|
$this->fetchCSVRow();
|
|
|
|
}
|
2014-08-15 18:53:05 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
/**
|
2014-08-15 18:53:05 +12:00
|
|
|
* @ignore
|
2008-10-03 00:27:58 +00:00
|
|
|
*/
|
2012-09-19 12:07:39 +02:00
|
|
|
public function current() {
|
2008-10-03 00:27:58 +00:00
|
|
|
return $this->currentRow;
|
|
|
|
}
|
2014-08-15 18:53:05 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
/**
|
2014-08-15 18:53:05 +12:00
|
|
|
* @ignore
|
2008-10-03 00:27:58 +00:00
|
|
|
*/
|
2012-09-19 12:07:39 +02:00
|
|
|
public function key() {
|
2008-10-03 00:27:58 +00:00
|
|
|
return $this->rowNum;
|
|
|
|
}
|
2014-08-15 18:53:05 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
/**
|
2014-08-15 18:53:05 +12:00
|
|
|
* @ignore
|
2008-10-03 00:27:58 +00:00
|
|
|
*/
|
2012-09-19 12:07:39 +02:00
|
|
|
public function next() {
|
2008-10-03 00:27:58 +00:00
|
|
|
$this->fetchCSVRow();
|
2014-08-15 18:53:05 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
return $this->currentRow;
|
|
|
|
}
|
2014-08-15 18:53:05 +12:00
|
|
|
|
2008-10-03 00:27:58 +00:00
|
|
|
/**
|
2014-08-15 18:53:05 +12:00
|
|
|
* @ignore
|
2008-10-03 00:27:58 +00:00
|
|
|
*/
|
2012-09-19 12:07:39 +02:00
|
|
|
public function valid() {
|
2008-10-03 00:27:58 +00:00
|
|
|
return $this->currentRow ? true : false;
|
|
|
|
}
|
2014-08-15 18:53:05 +12:00
|
|
|
}
|