2008-08-09 06:53:34 +02:00
|
|
|
<?php
|
|
|
|
/**
|
|
|
|
* A base for bulk loaders of content into the SilverStripe database.
|
2012-03-24 04:38:57 +01:00
|
|
|
* Bulk loaders give SilverStripe authors the ability to do large-scale uploads into their SilverStripe databases.
|
2008-08-09 06:53:34 +02:00
|
|
|
*
|
|
|
|
* You can configure column-handling,
|
|
|
|
*
|
|
|
|
* @todo Add support for adding/editing has_many relations.
|
|
|
|
* @todo Add support for deep chaining of relation properties (e.g. Player.Team.Stats.GoalCount)
|
2008-08-09 07:45:43 +02:00
|
|
|
* @todo Character conversion
|
2008-08-09 06:53:34 +02:00
|
|
|
*
|
|
|
|
* @see http://rfc.net/rfc4180.html
|
2011-03-23 04:13:51 +01:00
|
|
|
* @package sapphire
|
2008-08-09 06:53:34 +02:00
|
|
|
* @subpackage bulkloading
|
|
|
|
* @author Ingo Schommer, Silverstripe Ltd. (<firstname>@silverstripe.com)
|
|
|
|
*/
|
|
|
|
abstract class BulkLoader extends ViewableData {
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Each row in the imported dataset should map to one instance
|
|
|
|
* of this class (with optional property translation
|
|
|
|
* through {@self::$columnMaps}.
|
|
|
|
*
|
|
|
|
* @var string
|
|
|
|
*/
|
|
|
|
public $objectClass;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Override this on subclasses to give the specific functions names.
|
|
|
|
*
|
|
|
|
* @var string
|
|
|
|
*/
|
|
|
|
public static $title;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Map columns to DataObject-properties.
|
|
|
|
* If not specified, we assume the first row
|
|
|
|
* in the file contains the column headers.
|
|
|
|
* The order of your array should match the column order.
|
|
|
|
*
|
|
|
|
* The column count should match the count of array elements,
|
|
|
|
* fill with NULL values if you want to skip certain columns.
|
2008-08-09 07:45:43 +02:00
|
|
|
*
|
|
|
|
* You can also combine {@link $hasHeaderRow} = true and {@link $columnMap}
|
|
|
|
* and omit the NULL values in your map.
|
2008-08-09 06:53:34 +02:00
|
|
|
*
|
|
|
|
* Supports one-level chaining of has_one relations and properties with dot notation
|
|
|
|
* (e.g. Team.Title). The first part has to match a has_one relation name
|
|
|
|
* (not necessarily the classname of the used relation).
|
|
|
|
*
|
|
|
|
* <code>
|
|
|
|
* <?php
|
|
|
|
* // simple example
|
|
|
|
* array(
|
|
|
|
* 'Title',
|
|
|
|
* 'Birthday'
|
|
|
|
* )
|
|
|
|
*
|
|
|
|
* // complex example
|
|
|
|
* array(
|
|
|
|
* 'first name' => 'FirstName', // custom column name
|
|
|
|
* null, // ignored column
|
|
|
|
* 'RegionID', // direct has_one/has_many ID setting
|
|
|
|
* 'OrganisationTitle', // create has_one relation to existing record using $relationCallbacks
|
|
|
|
* 'street' => 'Organisation.StreetName', // match an existing has_one or create one and write property.
|
|
|
|
* );
|
|
|
|
* ?>
|
|
|
|
* </code>
|
|
|
|
*
|
|
|
|
* @var array
|
|
|
|
*/
|
2008-08-09 07:00:42 +02:00
|
|
|
public $columnMap = array();
|
2008-08-09 06:53:34 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Find a has_one relation based on a specific column value.
|
|
|
|
*
|
|
|
|
* <code>
|
|
|
|
* <?php
|
|
|
|
* array(
|
|
|
|
* 'OrganisationTitle' => array(
|
|
|
|
* 'relationname' => 'Organisation', // relation accessor name
|
|
|
|
* 'callback' => 'getOrganisationByTitle',
|
|
|
|
* );
|
|
|
|
* );
|
|
|
|
* ?>
|
|
|
|
* </code>
|
|
|
|
*
|
|
|
|
* @var array
|
|
|
|
*/
|
2008-08-09 07:00:42 +02:00
|
|
|
public $relationCallbacks = array();
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Specifies how to determine duplicates based on one or more provided fields
|
|
|
|
* in the imported data, matching to properties on the used {@link DataObject} class.
|
|
|
|
* Alternatively the array values can contain a callback method (see example for
|
2009-02-02 00:49:53 +01:00
|
|
|
* implementation details). The callback method should be defined on the source class.
|
|
|
|
*
|
|
|
|
* NOTE: If you're trying to get a unique Member record by a particular field that
|
|
|
|
* isn't Email, you need to ensure that Member is correctly set to the unique field
|
|
|
|
* you want, as it will merge any duplicates during {@link Member::onBeforeWrite()}.
|
|
|
|
*
|
|
|
|
* {@see Member::set_unique_identifier_field()}.
|
|
|
|
*
|
2008-08-09 07:00:42 +02:00
|
|
|
* If multiple checks are specified, the first one "wins".
|
|
|
|
*
|
|
|
|
* <code>
|
|
|
|
* <?php
|
|
|
|
* array(
|
|
|
|
* 'customernumber' => 'ID',
|
|
|
|
* 'phonenumber' => array(
|
|
|
|
* 'callback' => 'getByImportedPhoneNumber'
|
|
|
|
* )
|
|
|
|
* );
|
|
|
|
* ?>
|
|
|
|
* </code>
|
|
|
|
*
|
|
|
|
* @var array
|
|
|
|
*/
|
|
|
|
public $duplicateChecks = array();
|
2008-08-09 06:53:34 +02:00
|
|
|
|
2009-12-02 10:40:28 +01:00
|
|
|
/**
|
|
|
|
* @var Boolean $clearBeforeImport Delete ALL records before importing.
|
|
|
|
*/
|
|
|
|
public $deleteExistingRecords = false;
|
|
|
|
|
2008-08-09 06:53:34 +02:00
|
|
|
function __construct($objectClass) {
|
|
|
|
$this->objectClass = $objectClass;
|
2008-08-28 07:00:51 +02:00
|
|
|
parent::__construct();
|
2008-08-09 06:53:34 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2009-12-02 10:40:28 +01:00
|
|
|
* Load the given file via {@link self::processAll()} and {@link self::processRecord()}.
|
|
|
|
* Optionally truncates (clear) the table before it imports.
|
2008-08-09 06:53:34 +02:00
|
|
|
*
|
2008-09-15 16:21:43 +02:00
|
|
|
* @return BulkLoader_Result See {@link self::processAll()}
|
2008-08-09 06:53:34 +02:00
|
|
|
*/
|
2009-12-02 10:40:28 +01:00
|
|
|
public function load($filepath) {
|
2008-08-28 07:00:51 +02:00
|
|
|
ini_set('max_execution_time', 3600);
|
2009-12-02 10:40:28 +01:00
|
|
|
increase_memory_limit_to('512M');
|
2009-06-17 13:36:49 +02:00
|
|
|
|
2009-12-02 10:40:28 +01:00
|
|
|
//get all instances of the to be imported data object
|
|
|
|
if($this->deleteExistingRecords) {
|
2009-11-22 06:16:38 +01:00
|
|
|
DataObject::get($this->objectClass)->removeAll();
|
2009-09-04 02:31:08 +02:00
|
|
|
}
|
|
|
|
|
2008-08-09 06:53:34 +02:00
|
|
|
return $this->processAll($filepath);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Preview a file import (don't write anything to the database).
|
|
|
|
* Useful to analyze the input and give the users a chance to influence
|
|
|
|
* it through a UI.
|
|
|
|
*
|
2008-08-09 07:45:43 +02:00
|
|
|
* @todo Implement preview()
|
|
|
|
*
|
2008-08-09 06:53:34 +02:00
|
|
|
* @param string $filepath Absolute path to the file we're importing
|
|
|
|
* @return array See {@link self::processAll()}
|
|
|
|
*/
|
|
|
|
public function preview($filepath) {
|
2008-08-09 07:45:43 +02:00
|
|
|
user_error("BulkLoader::preview(): Not implemented", E_USER_ERROR);
|
2008-08-09 06:53:34 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Process every record in the file
|
|
|
|
*
|
|
|
|
* @param string $filepath Absolute path to the file we're importing (with UTF8 content)
|
|
|
|
* @param boolean $preview If true, we'll just output a summary of changes but not actually do anything
|
2008-09-15 16:21:43 +02:00
|
|
|
* @return BulkLoader_Result A collection of objects which are either created, updated or deleted.
|
|
|
|
* 'message': free-text string that can optionally provide some more information about what changes have
|
2008-08-09 06:53:34 +02:00
|
|
|
*/
|
|
|
|
abstract protected function processAll($filepath, $preview = false);
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Process a single record from the file.
|
|
|
|
*
|
|
|
|
* @param array $record An map of the data, keyed by the header field defined in {@link self::$columnMap}
|
2008-08-09 07:45:43 +02:00
|
|
|
* @param array $columnMap
|
2008-09-15 16:21:43 +02:00
|
|
|
* @param $result BulkLoader_Result (passed as reference)
|
2008-08-09 06:53:34 +02:00
|
|
|
* @param boolean $preview
|
|
|
|
*/
|
2008-09-15 16:21:43 +02:00
|
|
|
abstract protected function processRecord($record, $columnMap, &$result, $preview = false);
|
2008-08-09 06:53:34 +02:00
|
|
|
|
|
|
|
/**
|
2011-10-28 03:37:27 +02:00
|
|
|
* Return a FieldList containing all the options for this form; this
|
2008-08-09 06:53:34 +02:00
|
|
|
* doesn't include the actual upload field itself
|
|
|
|
*/
|
|
|
|
public function getOptionFields() {}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Return a human-readable name for this object.
|
|
|
|
* It defaults to the class name can be overridden by setting the static variable $title
|
|
|
|
*
|
|
|
|
* @return string
|
|
|
|
*/
|
|
|
|
public function Title() {
|
|
|
|
return ($title = $this->stat('title')) ? $title : $this->class;
|
|
|
|
}
|
|
|
|
|
2008-08-09 07:45:43 +02:00
|
|
|
/**
|
|
|
|
* Get a specification of all available columns and relations on the used model.
|
|
|
|
* Useful for generation of spec documents for technical end users.
|
|
|
|
*
|
|
|
|
* Return Format:
|
2009-03-22 23:59:14 +01:00
|
|
|
* <code>
|
2008-08-09 07:45:43 +02:00
|
|
|
* array(
|
|
|
|
* 'fields' => array('myFieldName'=>'myDescription'),
|
|
|
|
* 'relations' => array('myRelationName'=>'myDescription'),
|
|
|
|
* )
|
2009-03-22 23:59:14 +01:00
|
|
|
* </code>
|
2008-08-09 07:45:43 +02:00
|
|
|
*
|
|
|
|
* @todo Mix in custom column mappings
|
|
|
|
*
|
|
|
|
* @return array
|
|
|
|
**/
|
|
|
|
public function getImportSpec() {
|
|
|
|
$spec = array();
|
|
|
|
|
|
|
|
// get database columns (fieldlabels include fieldname as a key)
|
2009-04-29 02:07:39 +02:00
|
|
|
// using $$includerelations flag as false, so that it only contain $db fields
|
2010-02-11 03:18:54 +01:00
|
|
|
$spec['fields'] = (array)singleton($this->objectClass)->fieldLabels(false);
|
2008-08-09 07:45:43 +02:00
|
|
|
|
|
|
|
$has_ones = singleton($this->objectClass)->has_one();
|
|
|
|
$has_manys = singleton($this->objectClass)->has_many();
|
|
|
|
$many_manys = singleton($this->objectClass)->many_many();
|
|
|
|
|
|
|
|
$spec['relations'] = (array)$has_ones + (array)$has_manys + (array)$many_manys;
|
|
|
|
|
|
|
|
return $spec;
|
|
|
|
}
|
|
|
|
|
2008-08-09 07:00:42 +02:00
|
|
|
/**
|
|
|
|
* Determines if a specific field is null.
|
|
|
|
* Can be useful for unusual "empty" flags in the file,
|
|
|
|
* e.g. a "(not set)" value.
|
|
|
|
* The usual {@link DBField::isNull()} checks apply when writing the {@link DataObject},
|
|
|
|
* so this is mainly a customization method.
|
|
|
|
*
|
|
|
|
* @param mixed $val
|
|
|
|
* @param string $field Name of the field as specified in the array-values for {@link self::$columnMap}.
|
|
|
|
* @return boolean
|
|
|
|
*/
|
|
|
|
protected function isNullValue($val, $fieldName = null) {
|
2009-02-02 00:49:53 +01:00
|
|
|
return (empty($val) && $val !== '0');
|
2008-08-09 07:00:42 +02:00
|
|
|
}
|
|
|
|
|
2008-09-15 16:21:43 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Encapsulates the result of a {@link BulkLoader} import
|
|
|
|
* (usually through the {@link BulkLoader->processAll()} method).
|
|
|
|
*
|
|
|
|
* @todo Refactor to support lazy-loaded DataObjectSets once they are implemented.
|
|
|
|
*
|
2011-03-23 04:13:51 +01:00
|
|
|
* @package sapphire
|
2008-09-15 16:21:43 +02:00
|
|
|
* @subpackage bulkloading
|
|
|
|
* @author Ingo Schommer, Silverstripe Ltd. (<firstname>@silverstripe.com)
|
|
|
|
*/
|
|
|
|
class BulkLoader_Result extends Object {
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var array Stores a map of ID and ClassNames
|
|
|
|
* which can be reconstructed to DataObjects.
|
|
|
|
* As imports can get large we just store enough
|
|
|
|
* information to reconstruct the objects on demand.
|
|
|
|
* Optionally includes a status message specific to
|
|
|
|
* the import of this object. This information is stored
|
|
|
|
* in a custom object property "_BulkLoaderMessage".
|
|
|
|
*
|
2010-04-23 02:11:41 +02:00
|
|
|
* Example:
|
|
|
|
* <code>
|
|
|
|
* array(array('ID'=>1, 'ClassName'=>'Member', 'Message'=>'Updated existing record based on ParentID relation'))
|
|
|
|
* </code>
|
2008-09-15 16:21:43 +02:00
|
|
|
*/
|
|
|
|
protected $created = array();
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var array (see {@link $created})
|
|
|
|
*/
|
|
|
|
protected $updated = array();
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var array (see {@link $created})
|
|
|
|
*/
|
|
|
|
protected $deleted = array();
|
2008-10-28 04:24:06 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Stores the last change.
|
|
|
|
* It is in the same format as {@link $created} but with an additional key, "ChangeType", which will be set to
|
|
|
|
* one of 3 strings: "created", "updated", or "deleted"
|
|
|
|
*/
|
|
|
|
protected $lastChange = array();
|
2008-09-15 16:21:43 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns the count of all objects which were
|
|
|
|
* created or updated.
|
|
|
|
*
|
|
|
|
* @return int
|
|
|
|
*/
|
|
|
|
public function Count() {
|
|
|
|
return count($this->created) + count($this->updated);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @return int
|
|
|
|
*/
|
|
|
|
public function CreatedCount() {
|
|
|
|
return count($this->created);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @return int
|
|
|
|
*/
|
|
|
|
public function UpdatedCount() {
|
|
|
|
return count($this->updated);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @return int
|
|
|
|
*/
|
|
|
|
public function DeletedCount() {
|
|
|
|
return count($this->deleted);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns all created objects. Each object might
|
|
|
|
* contain specific importer feedback in the "_BulkLoaderMessage" property.
|
|
|
|
*
|
2011-10-26 08:09:04 +02:00
|
|
|
* @return ArrayList
|
2008-09-15 16:21:43 +02:00
|
|
|
*/
|
|
|
|
public function Created() {
|
2011-10-26 08:09:04 +02:00
|
|
|
return $this->mapToArrayList($this->created);
|
2008-09-15 16:21:43 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2011-10-26 08:09:04 +02:00
|
|
|
* @return ArrayList
|
2008-09-15 16:21:43 +02:00
|
|
|
*/
|
|
|
|
public function Updated() {
|
2011-10-26 08:09:04 +02:00
|
|
|
return $this->mapToArrayList($this->updated);
|
2008-09-15 16:21:43 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2011-10-26 08:09:04 +02:00
|
|
|
* @return ArrayList
|
2008-09-15 16:21:43 +02:00
|
|
|
*/
|
|
|
|
public function Deleted() {
|
2011-10-26 08:09:04 +02:00
|
|
|
return $this->mapToArrayList($this->deleted);
|
2008-09-15 16:21:43 +02:00
|
|
|
}
|
|
|
|
|
2008-10-28 04:24:06 +01:00
|
|
|
/**
|
|
|
|
* Returns the last change.
|
|
|
|
* It is in the same format as {@link $created} but with an additional key, "ChangeType", which will be set to
|
|
|
|
* one of 3 strings: "created", "updated", or "deleted"
|
|
|
|
*/
|
|
|
|
public function LastChange() {
|
|
|
|
return $this->lastChange;
|
|
|
|
}
|
|
|
|
|
2008-09-15 16:21:43 +02:00
|
|
|
/**
|
|
|
|
* @param $obj DataObject
|
|
|
|
* @param $message string
|
|
|
|
*/
|
|
|
|
public function addCreated($obj, $message = null) {
|
2008-10-28 04:24:06 +01:00
|
|
|
$this->created[] = $this->lastChange = array(
|
2008-09-15 16:21:43 +02:00
|
|
|
'ID' => $obj->ID,
|
|
|
|
'ClassName' => $obj->class,
|
|
|
|
'Message' => $message
|
|
|
|
);
|
2008-10-28 04:24:06 +01:00
|
|
|
$this->lastChange['ChangeType'] = 'created';
|
2008-09-15 16:21:43 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param $obj DataObject
|
|
|
|
* @param $message string
|
|
|
|
*/
|
|
|
|
public function addUpdated($obj, $message = null) {
|
2008-10-28 04:24:06 +01:00
|
|
|
$this->updated[] = $this->lastChange = array(
|
2008-09-15 16:21:43 +02:00
|
|
|
'ID' => $obj->ID,
|
|
|
|
'ClassName' => $obj->class,
|
|
|
|
'Message' => $message
|
|
|
|
);
|
2008-10-28 04:24:06 +01:00
|
|
|
$this->lastChange['ChangeType'] = 'updated';
|
2008-09-15 16:21:43 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param $obj DataObject
|
|
|
|
* @param $message string
|
|
|
|
*/
|
|
|
|
public function addDeleted($obj, $message = null) {
|
2008-10-28 04:24:06 +01:00
|
|
|
$this->deleted[] = $this->lastChange = array(
|
2008-09-15 16:21:43 +02:00
|
|
|
'ID' => $obj->ID,
|
|
|
|
'ClassName' => $obj->class,
|
|
|
|
'Message' => $message
|
|
|
|
);
|
2008-10-28 04:24:06 +01:00
|
|
|
$this->lastChange['ChangeType'] = 'deleted';
|
2008-09-15 16:21:43 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param $arr Array containing ID and ClassName maps
|
2011-10-26 08:09:04 +02:00
|
|
|
* @return ArrayList
|
2008-09-15 16:21:43 +02:00
|
|
|
*/
|
2011-10-26 08:09:04 +02:00
|
|
|
protected function mapToArrayList($arr) {
|
2011-05-05 12:40:24 +02:00
|
|
|
$set = new ArrayList();
|
2008-09-15 16:21:43 +02:00
|
|
|
foreach($arr as $arrItem) {
|
|
|
|
$obj = DataObject::get_by_id($arrItem['ClassName'], $arrItem['ID']);
|
|
|
|
$obj->_BulkLoaderMessage = $arrItem['Message'];
|
|
|
|
if($obj) $set->push($obj);
|
|
|
|
}
|
|
|
|
|
|
|
|
return $set;
|
|
|
|
}
|
|
|
|
|
2008-08-09 06:53:34 +02:00
|
|
|
}
|