<?php
/**
 * A base for bulk loaders of content into the SilverStripe database.
 * Bulk loaders give SilverStripe authors the ability to do large-scale uploads into their SilverStripe databases.
 *
 * You can configure column-handling,
 *
 * @todo Add support for adding/editing has_many relations.
 * @todo Add support for deep chaining of relation properties (e.g. Player.Team.Stats.GoalCount)
 * @todo Character conversion
 *
 * @see http://tools.ietf.org/html/rfc4180
 * @package framework
 * @subpackage bulkloading
 * @author Ingo Schommer, Silverstripe Ltd. (<firstname>@silverstripe.com)
 */
abstract class BulkLoader extends ViewableData {

	/**
	 * Each row in the imported dataset should map to one instance
	 * of this class (with optional property translation
	 * through {@self::$columnMaps}.
	 *
	 * @var string
	 */
	public $objectClass;

	/**
	 * Override this on subclasses to give the specific functions names.
	 *
	 * @var string
	 */
	public static $title;

	/**
	 * Map columns to DataObject-properties.
	 * If not specified, we assume the first row
	 * in the file contains the column headers.
	 * The order of your array should match the column order.
	 *
	 * The column count should match the count of array elements,
	 * fill with NULL values if you want to skip certain columns.
	 *
	 * You can also combine {@link $hasHeaderRow} = true and {@link $columnMap}
	 * and omit the NULL values in your map.
	 *
	 * Supports one-level chaining of has_one relations and properties with dot notation
	 * (e.g. Team.Title). The first part has to match a has_one relation name
	 * (not necessarily the classname of the used relation).
	 *
	 * <code>
	 * <?php
	 * 	// simple example
	 *  array(
	 *  	'Title',
	 * 		'Birthday'
	 * 	)
	 *
	 * // complex example
	 * 	array(
	 * 		'first name' => 'FirstName', // custom column name
	 * 		null, // ignored column
	 * 		'RegionID', // direct has_one/has_many ID setting
	 * 		'OrganisationTitle', // create has_one relation to existing record using $relationCallbacks
	 * 		'street' => 'Organisation.StreetName', // match an existing has_one or create one and write property.
	 * 	);
	 * ?>
	 * </code>
	 *
	 * @var array
	 */
	public $columnMap = array();

	/**
	 * Find a has_one relation based on a specific column value.
	 *
	 * <code>
	 * <?php
	 * array(
	 * 		'OrganisationTitle' => array(
	 * 			'relationname' => 'Organisation', // relation accessor name
	 * 			'callback' => 'getOrganisationByTitle',
	 *		);
	 * );
	 * ?>
	 * </code>
	 *
	 * @var array
	 */
	public $relationCallbacks = array();

	/**
	 * Specifies how to determine duplicates based on one or more provided fields
	 * in the imported data, matching to properties on the used {@link DataObject} class.
	 * Alternatively the array values can contain a callback method (see example for
	 * implementation details). The callback method should be defined on the source class.
	 *
	 * NOTE: If you're trying to get a unique Member record by a particular field that
	 * isn't Email, you need to ensure that Member is correctly set to the unique field
	 * you want, as it will merge any duplicates during {@link Member::onBeforeWrite()}.
	 *
	 * {@see Member::$unique_identifier_field}.
	 *
	 * If multiple checks are specified, the first non-empty field "wins".
	 *
	 *  <code>
	 * <?php
	 * array(
	 * 		'customernumber' => 'ID',
	 * 		'phonenumber' => array(
	 * 			'callback' => 'getByImportedPhoneNumber'
	 * 		)
	 * );
	 * ?>
	 * </code>
	 *
	 * @var array
	 */
	public $duplicateChecks = array();

	/**
	 * @var Boolean $clearBeforeImport Delete ALL records before importing.
	 */
	public $deleteExistingRecords = false;

	public function __construct($objectClass) {
		$this->objectClass = $objectClass;
		parent::__construct();
	}

	/*
	 * Load the given file via {@link self::processAll()} and {@link self::processRecord()}.
	 * Optionally truncates (clear) the table before it imports.
	 *
	 * @return BulkLoader_Result See {@link self::processAll()}
	 */
	public function load($filepath) {
		increase_time_limit_to(3600);
		increase_memory_limit_to('512M');

		//get all instances of the to be imported data object
		if($this->deleteExistingRecords) {
			DataObject::get($this->objectClass)->removeAll();
		}

		return $this->processAll($filepath);
	}

	/**
	 * Preview a file import (don't write anything to the database).
	 * Useful to analyze the input and give the users a chance to influence
	 * it through a UI.
	 *
	 * @todo Implement preview()
	 *
	 * @param string $filepath Absolute path to the file we're importing
	 * @return array See {@link self::processAll()}
	 */
	public function preview($filepath) {
		user_error("BulkLoader::preview(): Not implemented", E_USER_ERROR);
	}

	/**
	 * Process every record in the file
	 *
	 * @param string $filepath Absolute path to the file we're importing (with UTF8 content)
	 * @param boolean $preview If true, we'll just output a summary of changes but not actually do anything
	 * @return BulkLoader_Result A collection of objects which are either created, updated or deleted.
	 * 'message': free-text string that can optionally provide some more information about what changes have
	 */
	abstract protected function processAll($filepath, $preview = false);


	/**
	 * Process a single record from the file.
	 *
	 * @param array $record An map of the data, keyed by the header field defined in {@link self::$columnMap}
	 * @param array $columnMap
	 * @param $result BulkLoader_Result (passed as reference)
	 * @param boolean $preview
	 */
	abstract protected function processRecord($record, $columnMap, &$result, $preview = false);

	/**
	 * Return a FieldList containing all the options for this form; this
	 * doesn't include the actual upload field itself
	 */
	public function getOptionFields() {}

	/**
	 * Return a human-readable name for this object.
	 * It defaults to the class name can be overridden by setting the static variable $title
	 *
	 * @return string
	 */
	public function Title() {
		return ($title = $this->stat('title')) ? $title : $this->class;
	}

	/**
	 * Get a specification of all available columns and relations on the used model.
	 * Useful for generation of spec documents for technical end users.
	 *
	 * Return Format:
	 * <code>
	 * array(
	 *   'fields' => array('myFieldName'=>'myDescription'),
	 *   'relations' => array('myRelationName'=>'myDescription'),
	 * )
	 * </code>
	 *
	 * @todo Mix in custom column mappings
	 *
	 * @return array
	 **/
	public function getImportSpec() {
		$spec = array();

		// get database columns (fieldlabels include fieldname as a key)
		// using $$includerelations flag as false, so that it only contain $db fields
		$spec['fields'] = (array)singleton($this->objectClass)->fieldLabels(false);

		$has_ones = singleton($this->objectClass)->hasOne();
		$has_manys = singleton($this->objectClass)->hasMany();
		$many_manys = singleton($this->objectClass)->manyMany();

		$spec['relations'] = (array)$has_ones + (array)$has_manys + (array)$many_manys;

		return $spec;
	}

	/**
	 * Determines if a specific field is null.
	 * Can be useful for unusual "empty" flags in the file,
	 * e.g. a "(not set)" value.
	 * The usual {@link DBField::isNull()} checks apply when writing the {@link DataObject},
	 * so this is mainly a customization method.
	 *
	 * @param mixed $val
	 * @param string $field Name of the field as specified in the array-values for {@link self::$columnMap}.
	 * @return boolean
	 */
	protected function isNullValue($val, $fieldName = null) {
		return (empty($val) && $val !== '0');
	}

}

/**
 * Encapsulates the result of a {@link BulkLoader} import
 * (usually through the {@link BulkLoader->processAll()} method).
 *
 * @todo Refactor to support lazy-loaded DataObjectSets once they are implemented.
 *
 * @package framework
 * @subpackage bulkloading
 * @author Ingo Schommer, Silverstripe Ltd. (<firstname>@silverstripe.com)
 */
class BulkLoader_Result extends Object {

	/**
	 * @var array Stores a map of ID and ClassNames
	 * which can be reconstructed to DataObjects.
	 * As imports can get large we just store enough
	 * information to reconstruct the objects on demand.
	 * Optionally includes a status message specific to
	 * the import of this object. This information is stored
	 * in a custom object property "_BulkLoaderMessage".
	 *
	 * Example:
	 * <code>
	 * array(array('ID'=>1, 'ClassName'=>'Member', 'Message'=>'Updated existing record based on ParentID relation'))
	 * </code>
	 */
	protected $created = array();

	/**
	 * @var array (see {@link $created})
	 */
	protected $updated = array();

	/**
	 * @var array (see {@link $created})
	 */
	protected $deleted = array();

	/**
	 * Stores the last change.
	 * It is in the same format as {@link $created} but with an additional key, "ChangeType", which will be set to
	 * one of 3 strings: "created", "updated", or "deleted"
	 */
	protected $lastChange = array();

	/**
	 * Returns the count of all objects which were
	 * created or updated.
	 *
	 * @return int
	 */
	public function Count() {
		return count($this->created) + count($this->updated);
	}

	/**
	 * @return int
	 */
	public function CreatedCount() {
		return count($this->created);
	}

	/**
	 * @return int
	 */
	public function UpdatedCount() {
		return count($this->updated);
	}

	/**
	 * @return int
	 */
	public function DeletedCount() {
		return count($this->deleted);
	}

	/**
	 * Returns all created objects. Each object might
	 * contain specific importer feedback in the "_BulkLoaderMessage" property.
	 *
	 * @return ArrayList
	 */
	public function Created() {
		return $this->mapToArrayList($this->created);
	}

	/**
	 * @return ArrayList
	 */
	public function Updated() {
		return $this->mapToArrayList($this->updated);
	}

	/**
	 * @return ArrayList
	 */
	public function Deleted() {
		$set = new ArrayList();
		foreach ($this->deleted as $arrItem) {
			$set->push(ArrayData::create($arrItem));
		}
		return $set;
	}

	/**
	 * Returns the last change.
	 * It is in the same format as {@link $created} but with an additional key, "ChangeType", which will be set to
	 * one of 3 strings: "created", "updated", or "deleted"
	 */
	public function LastChange() {
		return $this->lastChange;
	}

	/**
	 * @param $obj DataObject
	 * @param $message string
	 */
	public function addCreated($obj, $message = null) {
		$this->created[] = $this->lastChange = array(
			'ID' => $obj->ID,
			'ClassName' => $obj->class,
			'Message' => $message
		);
		$this->lastChange['ChangeType'] = 'created';
	}

	/**
	 * @param $obj DataObject
	 * @param $message string
	 */
	public function addUpdated($obj, $message = null) {
		$this->updated[] = $this->lastChange = array(
			'ID' => $obj->ID,
			'ClassName' => $obj->class,
			'Message' => $message
		);
		$this->lastChange['ChangeType'] = 'updated';
	}

	/**
	 * @param $obj DataObject
	 * @param $message string
	 */
	public function addDeleted($obj, $message = null) {
		$data = $obj->toMap();
		$data['_BulkLoaderMessage'] = $message;
		$this->deleted[] = $this->lastChange = $data;
		$this->lastChange['ChangeType'] = 'deleted';
	}

	/**
	 * @param $arr Array containing ID and ClassName maps
	 * @return ArrayList
	 */
	protected function mapToArrayList($arr) {
		$set = new ArrayList();
		foreach($arr as $arrItem) {
			$obj = DataObject::get_by_id($arrItem['ClassName'], $arrItem['ID']);
			$obj->_BulkLoaderMessage = $arrItem['Message'];
			if($obj) $set->push($obj);
		}

		return $set;
	}

	/**
	 * Merges another BulkLoader_Result into this one.
	 *
	 * @param BulkLoader_Result $other
	 */
	public function merge(BulkLoader_Result $other) {
		$this->created = array_merge($this->created, $other->created);
		$this->updated = array_merge($this->updated, $other->updated);
		$this->deleted = array_merge($this->deleted, $other->deleted);
	}
}