silverstripe-framework/dev/BulkLoader.php

425 lines
12 KiB
PHP
Raw Permalink Normal View History

<?php
/**
* A base for bulk loaders of content into the SilverStripe database.
* Bulk loaders give SilverStripe authors the ability to do large-scale uploads into their SilverStripe databases.
2014-08-15 08:53:05 +02:00
*
* You can configure column-handling,
*
* @todo Add support for adding/editing has_many relations.
* @todo Add support for deep chaining of relation properties (e.g. Player.Team.Stats.GoalCount)
* @todo Character conversion
2014-08-15 08:53:05 +02:00
*
* @see http://tools.ietf.org/html/rfc4180
* @package framework
* @subpackage bulkloading
* @author Ingo Schommer, Silverstripe Ltd. (<firstname>@silverstripe.com)
*/
abstract class BulkLoader extends ViewableData {
2014-08-15 08:53:05 +02:00
/**
* Each row in the imported dataset should map to one instance
* of this class (with optional property translation
* through {@self::$columnMaps}.
*
* @var string
*/
public $objectClass;
2014-08-15 08:53:05 +02:00
/**
* Override this on subclasses to give the specific functions names.
2014-08-15 08:53:05 +02:00
*
* @var string
*/
public static $title;
/**
* Map columns to DataObject-properties.
* If not specified, we assume the first row
* in the file contains the column headers.
* The order of your array should match the column order.
2014-08-15 08:53:05 +02:00
*
* The column count should match the count of array elements,
* fill with NULL values if you want to skip certain columns.
*
* You can also combine {@link $hasHeaderRow} = true and {@link $columnMap}
* and omit the NULL values in your map.
2014-08-15 08:53:05 +02:00
*
* Supports one-level chaining of has_one relations and properties with dot notation
* (e.g. Team.Title). The first part has to match a has_one relation name
* (not necessarily the classname of the used relation).
2014-08-15 08:53:05 +02:00
*
* <code>
* <?php
* // simple example
* array(
* 'Title',
* 'Birthday'
* )
2014-08-15 08:53:05 +02:00
*
* // complex example
* array(
* 'first name' => 'FirstName', // custom column name
* null, // ignored column
* 'RegionID', // direct has_one/has_many ID setting
* 'OrganisationTitle', // create has_one relation to existing record using $relationCallbacks
* 'street' => 'Organisation.StreetName', // match an existing has_one or create one and write property.
* );
* ?>
* </code>
*
* @var array
*/
public $columnMap = array();
2014-08-15 08:53:05 +02:00
/**
* Find a has_one relation based on a specific column value.
2014-08-15 08:53:05 +02:00
*
* <code>
* <?php
* array(
* 'OrganisationTitle' => array(
* 'relationname' => 'Organisation', // relation accessor name
* 'callback' => 'getOrganisationByTitle',
* );
* );
* ?>
* </code>
*
* @var array
*/
public $relationCallbacks = array();
2014-08-15 08:53:05 +02:00
/**
* Specifies how to determine duplicates based on one or more provided fields
* in the imported data, matching to properties on the used {@link DataObject} class.
* Alternatively the array values can contain a callback method (see example for
* implementation details). The callback method should be defined on the source class.
2014-08-15 08:53:05 +02:00
*
* NOTE: If you're trying to get a unique Member record by a particular field that
* isn't Email, you need to ensure that Member is correctly set to the unique field
* you want, as it will merge any duplicates during {@link Member::onBeforeWrite()}.
2014-08-15 08:53:05 +02:00
*
* {@see Member::$unique_identifier_field}.
2014-08-15 08:53:05 +02:00
*
* If multiple checks are specified, the first non-empty field "wins".
2014-08-15 08:53:05 +02:00
*
* <code>
* <?php
* array(
* 'customernumber' => 'ID',
* 'phonenumber' => array(
* 'callback' => 'getByImportedPhoneNumber'
* )
* );
* ?>
* </code>
*
* @var array
*/
public $duplicateChecks = array();
2014-08-15 08:53:05 +02:00
/**
* @var Boolean $clearBeforeImport Delete ALL records before importing.
*/
public $deleteExistingRecords = false;
2014-08-15 08:53:05 +02:00
public function __construct($objectClass) {
$this->objectClass = $objectClass;
parent::__construct();
}
2014-08-15 08:53:05 +02:00
/*
* Load the given file via {@link self::processAll()} and {@link self::processRecord()}.
2014-08-15 08:53:05 +02:00
* Optionally truncates (clear) the table before it imports.
*
* @return BulkLoader_Result See {@link self::processAll()}
*/
public function load($filepath) {
increase_time_limit_to(3600);
increase_memory_limit_to('512M');
2014-08-15 08:53:05 +02:00
//get all instances of the to be imported data object
if($this->deleteExistingRecords) {
DataObject::get($this->objectClass)->removeAll();
2014-08-15 08:53:05 +02:00
}
return $this->processAll($filepath);
}
2014-08-15 08:53:05 +02:00
/**
* Preview a file import (don't write anything to the database).
* Useful to analyze the input and give the users a chance to influence
* it through a UI.
*
* @todo Implement preview()
*
* @param string $filepath Absolute path to the file we're importing
* @return array See {@link self::processAll()}
*/
public function preview($filepath) {
user_error("BulkLoader::preview(): Not implemented", E_USER_ERROR);
}
2014-08-15 08:53:05 +02:00
/**
* Process every record in the file
2014-08-15 08:53:05 +02:00
*
* @param string $filepath Absolute path to the file we're importing (with UTF8 content)
* @param boolean $preview If true, we'll just output a summary of changes but not actually do anything
* @return BulkLoader_Result A collection of objects which are either created, updated or deleted.
* 'message': free-text string that can optionally provide some more information about what changes have
*/
abstract protected function processAll($filepath, $preview = false);
2014-08-15 08:53:05 +02:00
/**
* Process a single record from the file.
2014-08-15 08:53:05 +02:00
*
* @param array $record An map of the data, keyed by the header field defined in {@link self::$columnMap}
* @param array $columnMap
* @param $result BulkLoader_Result (passed as reference)
* @param boolean $preview
*/
abstract protected function processRecord($record, $columnMap, &$result, $preview = false);
2014-08-15 08:53:05 +02:00
/**
* Return a FieldList containing all the options for this form; this
* doesn't include the actual upload field itself
*/
public function getOptionFields() {}
2014-08-15 08:53:05 +02:00
/**
* Return a human-readable name for this object.
* It defaults to the class name can be overridden by setting the static variable $title
2014-08-15 08:53:05 +02:00
*
* @return string
*/
public function Title() {
return ($title = $this->stat('title')) ? $title : $this->class;
}
2014-08-15 08:53:05 +02:00
/**
* Get a specification of all available columns and relations on the used model.
* Useful for generation of spec documents for technical end users.
2014-08-15 08:53:05 +02:00
*
* Return Format:
* <code>
* array(
2014-08-15 08:53:05 +02:00
* 'fields' => array('myFieldName'=>'myDescription'),
* 'relations' => array('myRelationName'=>'myDescription'),
* )
* </code>
*
* @todo Mix in custom column mappings
*
* @return array
**/
public function getImportSpec() {
$spec = array();
// get database columns (fieldlabels include fieldname as a key)
// using $$includerelations flag as false, so that it only contain $db fields
$spec['fields'] = (array)singleton($this->objectClass)->fieldLabels(false);
2014-08-15 08:53:05 +02:00
$has_ones = singleton($this->objectClass)->hasOne();
$has_manys = singleton($this->objectClass)->hasMany();
$many_manys = singleton($this->objectClass)->manyMany();
2014-08-15 08:53:05 +02:00
$spec['relations'] = (array)$has_ones + (array)$has_manys + (array)$many_manys;
2014-08-15 08:53:05 +02:00
return $spec;
}
2014-08-15 08:53:05 +02:00
/**
* Determines if a specific field is null.
* Can be useful for unusual "empty" flags in the file,
* e.g. a "(not set)" value.
* The usual {@link DBField::isNull()} checks apply when writing the {@link DataObject},
* so this is mainly a customization method.
*
* @param mixed $val
* @param string $field Name of the field as specified in the array-values for {@link self::$columnMap}.
* @return boolean
*/
protected function isNullValue($val, $fieldName = null) {
return (empty($val) && $val !== '0');
}
2014-08-15 08:53:05 +02:00
}
/**
* Encapsulates the result of a {@link BulkLoader} import
* (usually through the {@link BulkLoader->processAll()} method).
2014-08-15 08:53:05 +02:00
*
* @todo Refactor to support lazy-loaded DataObjectSets once they are implemented.
*
* @package framework
* @subpackage bulkloading
* @author Ingo Schommer, Silverstripe Ltd. (<firstname>@silverstripe.com)
*/
class BulkLoader_Result extends Object {
2014-08-15 08:53:05 +02:00
/**
* @var array Stores a map of ID and ClassNames
* which can be reconstructed to DataObjects.
* As imports can get large we just store enough
* information to reconstruct the objects on demand.
* Optionally includes a status message specific to
* the import of this object. This information is stored
* in a custom object property "_BulkLoaderMessage".
*
* Example:
* <code>
* array(array('ID'=>1, 'ClassName'=>'Member', 'Message'=>'Updated existing record based on ParentID relation'))
* </code>
2014-08-15 08:53:05 +02:00
*/
protected $created = array();
/**
* @var array (see {@link $created})
*/
protected $updated = array();
/**
* @var array (see {@link $created})
*/
protected $deleted = array();
2014-08-15 08:53:05 +02:00
/**
* Stores the last change.
* It is in the same format as {@link $created} but with an additional key, "ChangeType", which will be set to
* one of 3 strings: "created", "updated", or "deleted"
*/
protected $lastChange = array();
/**
* Returns the count of all objects which were
* created or updated.
*
* @return int
*/
public function Count() {
return count($this->created) + count($this->updated);
}
2014-08-15 08:53:05 +02:00
/**
* @return int
*/
public function CreatedCount() {
return count($this->created);
}
2014-08-15 08:53:05 +02:00
/**
* @return int
*/
public function UpdatedCount() {
return count($this->updated);
}
2014-08-15 08:53:05 +02:00
/**
* @return int
*/
public function DeletedCount() {
return count($this->deleted);
}
2014-08-15 08:53:05 +02:00
/**
* Returns all created objects. Each object might
* contain specific importer feedback in the "_BulkLoaderMessage" property.
*
* @return ArrayList
*/
public function Created() {
return $this->mapToArrayList($this->created);
}
2014-08-15 08:53:05 +02:00
/**
* @return ArrayList
*/
public function Updated() {
return $this->mapToArrayList($this->updated);
}
2014-08-15 08:53:05 +02:00
/**
* @return ArrayList
*/
public function Deleted() {
$set = new ArrayList();
foreach ($this->deleted as $arrItem) {
$set->push(ArrayData::create($arrItem));
}
return $set;
}
2014-08-15 08:53:05 +02:00
/**
* Returns the last change.
* It is in the same format as {@link $created} but with an additional key, "ChangeType", which will be set to
* one of 3 strings: "created", "updated", or "deleted"
*/
public function LastChange() {
return $this->lastChange;
}
2014-08-15 08:53:05 +02:00
/**
* @param $obj DataObject
* @param $message string
*/
public function addCreated($obj, $message = null) {
$this->created[] = $this->lastChange = array(
'ID' => $obj->ID,
'ClassName' => $obj->class,
'Message' => $message
);
$this->lastChange['ChangeType'] = 'created';
}
2014-08-15 08:53:05 +02:00
/**
* @param $obj DataObject
* @param $message string
*/
public function addUpdated($obj, $message = null) {
$this->updated[] = $this->lastChange = array(
'ID' => $obj->ID,
'ClassName' => $obj->class,
'Message' => $message
);
$this->lastChange['ChangeType'] = 'updated';
}
2014-08-15 08:53:05 +02:00
/**
* @param $obj DataObject
* @param $message string
*/
public function addDeleted($obj, $message = null) {
$data = $obj->toMap();
$data['_BulkLoaderMessage'] = $message;
$this->deleted[] = $this->lastChange = $data;
$this->lastChange['ChangeType'] = 'deleted';
}
2014-08-15 08:53:05 +02:00
/**
* @param $arr Array containing ID and ClassName maps
* @return ArrayList
*/
protected function mapToArrayList($arr) {
$set = new ArrayList();
foreach($arr as $arrItem) {
$obj = DataObject::get_by_id($arrItem['ClassName'], $arrItem['ID']);
$obj->_BulkLoaderMessage = $arrItem['Message'];
if($obj) $set->push($obj);
}
2014-08-15 08:53:05 +02:00
return $set;
}
/**
* Merges another BulkLoader_Result into this one.
*
* @param BulkLoader_Result $other
*/
public function merge(BulkLoader_Result $other) {
$this->created = array_merge($this->created, $other->created);
$this->updated = array_merge($this->updated, $other->updated);
$this->deleted = array_merge($this->deleted, $other->deleted);
}
}