NEW Add migration task and documentation for 1.x to 2.x upgrade

This commit is contained in:
Robbie Averill 2017-05-08 15:03:05 +12:00
parent ed6f805574
commit aafcc35f6c
4 changed files with 555 additions and 0 deletions

View File

@ -0,0 +1,249 @@
<?php
/**
* This build task helps to migrate DMS data structures from DMS 1.x to 2.x which introduces document sets.
*
* See the "document-sets.md" migration guide for more information and use examples.
*/
class MigrateToDocumentSetsTask extends BuildTask
{
protected $title = 'DMS 2.0 Migration Tool';
protected $description = 'Migration tool for upgrading from DMS 1.x to 2.x. Add "action=create-default-document-set" to create a default set. "reassign-documents" to reassign legacy document relations. "dryrun=1" to show changes without writing.';
/**
* The valid actions that this task can perform (and the method that does them as the key)
* @var array
*/
protected $validActions = array(
'createDefaultSet' => 'create-default-document-set',
'reassignDocuments' => 'reassign-documents'
);
/**
* @var SS_HTTPRequest
*/
protected $request;
/**
* Holds number of pages/sets/documents processed for output at the end. Example:
*
* <code>
* array(
* 'total-pages' => 0,
* 'pages-updated' => 0
* )
* </code>
*
* The individual action methods will update these metrics as required
*
* @var array
*/
protected $results = array();
public function run($request)
{
$this->request = $request;
$action = $request->getVar('action');
if (!in_array($action, $this->validActions)) {
$this->output(
'Error! Specified action is not valid. Valid actions are: ' . implode(', ', $this->validActions)
);
$this->output('You can add "dryrun=1" to enable dryrun mode where no changes will be written to the DB.');
return;
}
$this->outputHeader();
$action = array_search($action, $this->validActions);
$this->$action();
$this->outputResults();
}
/**
* Returns whether dryrun mode is enabled ("dryrun=1")
*
* @return bool
*/
public function isDryrun()
{
return (bool) $this->request->getVar('dryrun') == 1;
}
/**
* Creates a default document set for any valid page that doesn't have one
*
* @return $this
*/
protected function createDefaultSet()
{
$pages = SiteTree::get();
foreach ($pages as $page) {
// Only handle valid page types
if (!$page->config()->get('documents_enabled')) {
$this->addResult('Skipped: documents disabled');
continue;
}
if ($page->DocumentSets()->count()) {
// Don't add a set if it already has one
$this->addResult('Skipped: already has a set');
continue;
}
$this->addDefaultDocumentSet($page);
$this->addResult('Default document set added');
}
return $this;
}
/**
* Reassign documents to the default document set, where they'd previously have been assigned to pages
*
* @return $this
*/
protected function reassignDocuments()
{
$countCheck = SQLSelect::create('*', 'DMSDocument_Pages');
if (!$countCheck->count()) {
$this->output('There was no data to migrate. Finishing.');
return $this;
}
$query = SQLSelect::create(array('DMSDocumentID', 'SiteTreeID'), 'DMSDocument_Pages');
$result = $query->execute();
foreach ($result as $row) {
$document = DMSDocument::get()->byId($row['DMSDocumentID']);
if (!$document) {
$this->addResult('Skipped: document does not exist');
continue;
}
$page = SiteTree::get()->byId($row['SiteTreeID']);
if (!$page) {
$this->addResult('Skipped: page does not exist');
continue;
}
// Don't try and process pages that don't have a document set. This should be created by the first
// action step in this build task, so shouldn't occur if run in correct order.
if (!$page->DocumentSets()->count()) {
$this->addResult('Skipped: no default document set');
continue;
}
$this->addDocumentToSet($document, $page->DocumentSets()->first());
$this->addResult('Reassigned to document set');
}
return $this;
}
/**
* Create a "default" document set and add it to the given Page via the ORM relationship added by
* {@link DMSSiteTreeExtension}
*
* @param SiteTree $page
* @return $this
*/
protected function addDefaultDocumentSet(SiteTree $page)
{
if ($this->isDryrun()) {
return $this;
}
$set = DMSDocumentSet::create();
$set->Title = 'Default';
$set->write();
$page->DocumentSets()->add($set);
return $this;
}
/**
* Add the given document to the given document set
*
* @param DMSDocument $document
* @param DMSDocumentSet $set
* @return $this
*/
protected function addDocumentToSet(DMSDocument $document, DMSDocumentSet $set)
{
if ($this->isDryrun()) {
return $this;
}
$set->Documents()->add($document);
return $this;
}
/**
* Output a header info line
*
* @return $this
*/
protected function outputHeader()
{
$this->output('Migrating DMS data to 2.x for document sets');
if ($this->isDryrun()) {
$this->output('NOTE: Dryrun mode enabled. No changes will be written.');
}
return $this;
}
/**
* Output a "finished" notice and the results of what was done
*
* @return $this
*/
protected function outputResults()
{
$this->output();
$this->output('Finished:');
foreach ($this->results as $metric => $count) {
$this->output('+ ' . $metric . ': ' . $count);
}
return $this;
}
/**
* Add the $increment to the result key identified by $key
*
* @param string $key
* @param int $increment
* @return $this
*/
protected function addResult($key, $increment = 1)
{
if (!array_key_exists($key, $this->results)) {
$this->results[$key] = 0;
}
$this->results[$key] += $increment;
return $this;
}
/**
* Outputs a message formatted either for CLI or browser output
*
* @param string $message
* @return $this
*/
public function output($message = '')
{
if ($this->isCli()) {
echo $message, PHP_EOL;
} else {
echo $message . '<br />';
}
return $this;
}
/**
* Returns whether the task is called via CLI or not
*
* @return bool
*/
protected function isCli()
{
return Director::is_cli();
}
}

View File

@ -0,0 +1,142 @@
# Migrating to use Document Sets
> **Warning!** Please ensure you take a backup of your database before performing any of these migration task steps.
Version 2.0.0 of the DMS module introduces document sets as the containing relationship for pages and documents. In
previous versions of DMS the relationship was between pages and documents directly.
If you are migrating from an earlier version of DMS to 2.x, you will need to set up new document sets for each page
that contained documents and establish the links from the old document-page to the new document set-document, and
document set-page.
We have included a migration build task that you can use to automate this process. It can be access via
`/dev/tasks/MigrateToDocumentSetsTask`, and will prompt you for the following steps in the migration process:
* Create a default document set for all valid pages (see note)
* Re-assign documents to their original page's new document set
## Using the migration build task
### Enabling dry run mode
For either of the "actions" in this build task, you can enable dry run mode to see what the results will be without
it actually writing anything in the database. We advise you do this as a first step.
You can enable dryrun mode by adding `dryrun=1` as an argument.
Example output will contain the following when dryrun mode is enabled:
```plain
NOTE: Dryrun mode enabled. No changes will be written.
```
### 1. Create a default document set
The first step of the migration build task will find all pages that do not have documents disabled (see note) and will
create a document set called "Default" if one does not already exist. In the case where a document set already exists
for a page, it will be used as the default.
Run from command line:
```plain
sake dev/tasks/MigrateToDocumentSetsTask action=create-default-document-set
```
Run from a browser:
```plain
http://yoursite.dev/dev/tasks/MigrateToDocumentSetsTask?action=create-default-document-set
```
An example output from this task might look like this:
```plain
Running Task DMS 2.0 Migration Tool
Migrating DMS data to 2.x for document sets
Finished:
+ Default document set added: 6
+ Skipped: documents disabled: 1
```
This task will only write records for those that are needed. If you run it more than once it will simply not do
anything.
### 2. Re-assign documents
> **Note!** If you want to choose specific document sets for documents to be assigned to rather than just the first
belonging to a page, you will need to run these queries manually (see further in this document).
The second step in the migration task is to reassign the relationship from pages to documents to document set to
documents. This task assumes that the original relationship data is still present in the database, since SilverStripe
will not remove old columns from the database tables once they've been made obsolete.
Run from command line:
```plain
sake dev/tasks/MigrateToDocumentSetsTask action=reassign-documents
```
Run from a browser:
```plain
http://yoursite.dev/dev/tasks/MigrateToDocumentSetsTask?action=reassign-documents
```
An example output from this task might look like this:
```plain
Running Task DMS 2.0 Migration Tool
Migrating DMS data to 2.x for document sets
Finished:
+ Reassigned to document set: 4
```
This task will show the same output on the initial and subsequent runs. You can follow the instructions below to clean
up legacy data after you've validated that everything is working correctly if you'd like to.
## Cleanup
Since SilverStripe will not remove the old obselete relationship table from the database, you can remove it manually
if required. Only do this once you've validated that everything has been migrated correctly.
```sql
DROP TABLE `your_ss_database`.`DMSDocument_Pages`;
```
## Migrating data manually
As mentioned earlier, if you need to migrate data manually for one reason or another you can do so with a couple of
manual SQL queries to the database.
One example of why you may need to do this is if you don't want your documents to
be automatically assigned to the "default" document set on a page, but would prefer to choose a specific set to assign
to. The automated build task cannot make this decision for us, but you can run some queries yourself.
In DMS 1.x the relationship of documents to pages is stored in the `DMSDocument_Pages` table. If you run an explain
query you will see some obviously named foreign key columns for `DMSDocumentID` and `SiteTreeID`.
In DMS 2.x the relationship is of document _sets_ to documents, and is stored in `DMSDocumentSet_Documents`.
How you manipulate this data is up to you, but an example might be that you want to move a certain range of documents
by their IDs into a specific document set (by its ID), so you could run the following:
```sql
-- Insert the new records
INSERT INTO `your_ss_database`.`DMSDocumentSet_Documents`
(`DMSDocumentSetID`, `DMSDocumentID`)
SELECT
-- your document set ID
123,
`ID`
FROM `your_ss_database`.`DMSDocument` WHERE `ID` IN(1, 2, 3, 4); -- your document IDs
```
## Notes
> Create a default document set for all valid pages
"Valid pages" means that the page class does not have the `documents_enabled` configuration property set to `false`.

View File

@ -0,0 +1,142 @@
<?php
class MigrateToDocumentSetsTaskTest extends SapphireTest
{
protected static $fixture_file = 'MigrateToDocumentSetsTaskTest.yml';
/**
* Ensure that output is formatted either for the CLI or browser
*
* @param bool $isCli
* @param string $expected
* @dataProvider outputProvider
*/
public function testCanOutputToCliOrBrowser($isCli, $expected)
{
$lines = array('Test', 'Test line 2');
$mock = $this->getMockBuilder('MigrateToDocumentSetsTask')
->setMethods(array('isCli'))
->getMock();
$mock->expects($this->exactly(2))
->method('isCli')
->will($this->returnValue($isCli));
ob_start();
foreach ($lines as $line) {
$mock->output($line);
}
$result = ob_get_clean();
$this->assertSame($expected, $result);
}
/**
* @return array[]
*/
public function outputProvider()
{
return array(
array(true, 'Test' . PHP_EOL . 'Test line 2' . PHP_EOL),
array(false, 'Test<br />Test line 2<br />')
);
}
/**
* Ensure that providing an invalid action returns an error
*/
public function testShowErrorOnInvalidAction()
{
$result = $this->runTask(array('action' => 'coffeetime'));
$this->assertContains('Error! Specified action is not valid.', $result);
}
/**
* Test that default document sets can be created for those pages that don't have them already
*/
public function testCreateDefaultDocumentSets()
{
$this->fixtureOldRelations();
$result = $this->runTask(array('action' => 'create-default-document-set'));
$this->assertContains('Finished', $result);
// There are four pages in the fixture, but one of them already has a document set, so should be unchanged
$this->assertContains('Default document set added: 3', $result);
$this->assertContains('Skipped: already has a set: 1', $result);
// Test that some of the relationship records were written correctly
$this->assertCount(1, $firstPageSets = $this->objFromFixture('SiteTree', 'one')->getDocumentSets());
$this->assertSame('Default', $firstPageSets->first()->Title);
$this->assertCount(1, $this->objFromFixture('SiteTree', 'two')->getDocumentSets());
// With dryrun enabled and being run the second time, nothing should be done
$result = $this->runTask(array('action' => 'create-default-document-set', 'dryrun' => '1'));
$this->assertContains('Skipped: already has a set: 4', $result);
$this->assertContains('NOTE: Dryrun mode enabled', $result);
}
/**
* Test that legacy ORM relationship maps are migrated to the new page -> document set -> document relationship
*/
public function testReassignDocumentsToFirstSet()
{
$this->fixtureOldRelations();
// Ensure default sets are created
$this->runTask(array('action' => 'create-default-document-set'));
// Dryrun check
$result = $this->runTask(array('action' => 'reassign-documents', 'dryrun' => '1'));
$this->assertContains('NOTE: Dryrun mode enabled', $result);
$this->assertContains('Reassigned to document set: 3', $result);
// Actual run
$result = $this->runTask(array('action' => 'reassign-documents'));
$this->assertNotContains('NOTE: Dryrun mode enabled', $result);
$this->assertContains('Reassigned to document set: 3', $result);
// Smoke ORM checks
$this->assertCount(1, $this->objFromFixture('SiteTree', 'one')->getAllDocuments());
$this->assertCount(1, $this->objFromFixture('SiteTree', 'two')->getAllDocuments());
$this->assertCount(0, $this->objFromFixture('SiteTree', 'four')->getAllDocuments());
}
/**
* Centralises (slightly) logic for capturing direct output from the task
*
* @param array $getVars
* @return string Task output
*/
protected function runTask(array $getVars)
{
$task = new MigrateToDocumentSetsTask;
$request = new SS_HTTPRequest('GET', '/', $getVars);
ob_start();
$task->run($request);
return ob_get_clean();
}
/**
* Set up the old many many relationship table from documents to pages
*/
protected function fixtureOldRelations()
{
if (!DB::get_schema()->hasTable('DMSDocument_Pages')) {
DB::create_table('DMSDocument_Pages', array(
'DMSDocumentID' => 'int(11) null',
'SiteTreeID' => 'int(11) null'
));
}
$documentIds = $this->getFixtureFactory()->getIds('DMSDocument');
$pageIds = $this->getFixtureFactory()->getIds('SiteTree');
foreach (array('one', 'two', 'three') as $fixtureName) {
$this->getFixtureFactory()->createRaw(
'DMSDocument_Pages',
'rln_' . $fixtureName,
array('DMSDocumentID' => $documentIds[$fixtureName], 'SiteTreeID' => $pageIds[$fixtureName])
);
}
}
}

View File

@ -0,0 +1,22 @@
# Fixtures for migration task testing. The relationships for them are
# created manually in the unit test class.
DMSDocument:
one:
Title: document1
two:
Title: document2
three:
Title: document3
DMSDocumentSet:
four:
Title: documentSet4
SiteTree:
one:
Title: page1
two:
Title: page2
three:
Title: page3
four:
Title: page4
DocumentSets: =>DMSDocumentSet.four