mirror of
https://github.com/silverstripe/silverstripe-framework
synced 2024-10-22 14:05:37 +02:00
RFC Add chunk method to DataList to iterate over large dataset (#8940)
This commit is contained in:
parent
dcdc25500b
commit
6fc25e4e96
@ -10,7 +10,7 @@ Whenever using the ORM to fetch records or navigate relationships you will recei
|
|||||||
either [DataList](api:SilverStripe\ORM\DataList) or [RelationList](api:SilverStripe\ORM\RelationList). This object gives you the ability to iterate over each of the results or
|
either [DataList](api:SilverStripe\ORM\DataList) or [RelationList](api:SilverStripe\ORM\RelationList). This object gives you the ability to iterate over each of the results or
|
||||||
modify.
|
modify.
|
||||||
|
|
||||||
## Iterating over the list.
|
## Iterating over the list
|
||||||
|
|
||||||
[SS_List](api:SilverStripe\ORM\SS_List) implements `IteratorAggregate`, allowing you to loop over the instance.
|
[SS_List](api:SilverStripe\ORM\SS_List) implements `IteratorAggregate`, allowing you to loop over the instance.
|
||||||
|
|
||||||
@ -32,7 +32,7 @@ Or in the template engine:
|
|||||||
<% end_loop %>
|
<% end_loop %>
|
||||||
```
|
```
|
||||||
|
|
||||||
## Finding an item by value.
|
## Finding an item by value
|
||||||
|
|
||||||
```php
|
```php
|
||||||
// $list->find($key, $value);
|
// $list->find($key, $value);
|
||||||
@ -79,6 +79,42 @@ echo $members->column('Email');
|
|||||||
// ];
|
// ];
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Iterating over a large list {#chunkedFetch}
|
||||||
|
|
||||||
|
When iterating over a DataList, all DataObjects in the list will be loaded in memory. This can consume a lot of memory when working with a large data set.
|
||||||
|
|
||||||
|
To limit the number of DataObjects loaded in memory, you can use the `chunkedFetch()` method on your DataList. In most cases, you can iterate over the results of `chunkedFetch()` the same way you would iterate over your DataList. Internally, `chunkedFetch()` will split your DataList query into smaller queries and keep running through them until it runs out of results.
|
||||||
|
|
||||||
|
```php
|
||||||
|
$members = Member::get();
|
||||||
|
foreach ($members as $member) {
|
||||||
|
echo $member->Email;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This call will produce the same output, but it will use less memory and run more queries against the database
|
||||||
|
$members = Member::get()->chunkedFetch();
|
||||||
|
foreach ($members as $member) {
|
||||||
|
echo $member->Email;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
`chunkedFetch()` will respect any filter or sort condition applied to the DataList. By default, chunk will limit each query to 1000 results. You can explicitly set this limit by passing an integer to `chunkedFetch()`.
|
||||||
|
|
||||||
|
```php
|
||||||
|
$members = Member::get()
|
||||||
|
->filter('Email:PartialMatch', 'silverstripe.com')
|
||||||
|
->sort('Email')
|
||||||
|
->chunkedFetch(10);
|
||||||
|
foreach ($members as $member) {
|
||||||
|
echo $member->Email;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
They are some limitations:
|
||||||
|
* `chunkedFetch()` will ignore any limit or offset you have applied to your DataList
|
||||||
|
* you can not "count" a chunked list or do any other call against it aside from iterating it
|
||||||
|
* while iterating over a chunked list, you can not perform any operation that would alter the order of the items.
|
||||||
|
|
||||||
## ArrayList
|
## ArrayList
|
||||||
|
|
||||||
[ArrayList](api:SilverStripe\ORM\ArrayList) exists to wrap a standard PHP array in the same API as a database backed list.
|
[ArrayList](api:SilverStripe\ORM\ArrayList) exists to wrap a standard PHP array in the same API as a database backed list.
|
||||||
|
@ -6,6 +6,8 @@
|
|||||||
|
|
||||||
## New features
|
## New features
|
||||||
|
|
||||||
|
* [Added a `chunkedFetch()` method to `DataList`](/Developer_Guides/Model/Lists#chunkedFetch) to avoid loading large result sets in memory all at once.
|
||||||
|
|
||||||
### Support for silverstripe/graphql v4 {#graphql-v4}
|
### Support for silverstripe/graphql v4 {#graphql-v4}
|
||||||
|
|
||||||
The [silverstripe/graphql](http://github.com/silverstripe/silverstripe-graphql/issues) module
|
The [silverstripe/graphql](http://github.com/silverstripe/silverstripe-graphql/issues) module
|
||||||
|
@ -1286,4 +1286,44 @@ class DataList extends ViewableData implements SS_List, Filterable, Sortable, Li
|
|||||||
{
|
{
|
||||||
throw new \BadMethodCallException("Can't alter items in a DataList using array-access");
|
throw new \BadMethodCallException("Can't alter items in a DataList using array-access");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Iterate over this DataList in "chunks". This will break the query in smaller subsets and avoid loading the entire
|
||||||
|
* result set in memory at once. Beware not to perform any operations on the results that might alter the return
|
||||||
|
* order. Otherwise, you might break subsequent chunks.
|
||||||
|
*
|
||||||
|
* You also can not define a custom limit or offset when using the chunk method.
|
||||||
|
*
|
||||||
|
* @param int $chunkSize
|
||||||
|
* @throws InvalidArgumentException If `$chunkSize` has an invalid size.
|
||||||
|
* @return Generator|DataObject[]
|
||||||
|
*/
|
||||||
|
public function chunkedFetch(int $chunkSize = 1000): iterable
|
||||||
|
{
|
||||||
|
if ($chunkSize < 1) {
|
||||||
|
throw new InvalidArgumentException(sprintf(
|
||||||
|
'%s::%s: chunkSize must be greater than or equal to 1',
|
||||||
|
__CLASS__,
|
||||||
|
__METHOD__
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
$currentChunk = 0;
|
||||||
|
|
||||||
|
// Keep looping until we run out of chunks
|
||||||
|
while ($chunk = $this->limit($chunkSize, $chunkSize * $currentChunk)->getIterator()) {
|
||||||
|
// Loop over all the item in our chunk
|
||||||
|
foreach ($chunk as $item) {
|
||||||
|
yield $item;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if ($chunk->count() < $chunkSize) {
|
||||||
|
// If our last chunk had less item than our chunkSize, we've reach the end.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
$currentChunk++;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -11,10 +11,11 @@ use SilverStripe\ORM\DataQuery;
|
|||||||
use SilverStripe\ORM\DB;
|
use SilverStripe\ORM\DB;
|
||||||
use SilverStripe\ORM\Filterable;
|
use SilverStripe\ORM\Filterable;
|
||||||
use SilverStripe\ORM\Filters\ExactMatchFilter;
|
use SilverStripe\ORM\Filters\ExactMatchFilter;
|
||||||
|
use SilverStripe\ORM\Tests\DataObjectTest\DataListQueryCounter;
|
||||||
|
use SilverStripe\ORM\Tests\DataObjectTest\Fixture;
|
||||||
use SilverStripe\ORM\Tests\DataObjectTest\Bracket;
|
use SilverStripe\ORM\Tests\DataObjectTest\Bracket;
|
||||||
use SilverStripe\ORM\Tests\DataObjectTest\EquipmentCompany;
|
use SilverStripe\ORM\Tests\DataObjectTest\EquipmentCompany;
|
||||||
use SilverStripe\ORM\Tests\DataObjectTest\Fan;
|
use SilverStripe\ORM\Tests\DataObjectTest\Fan;
|
||||||
use SilverStripe\ORM\Tests\DataObjectTest\Fixture;
|
|
||||||
use SilverStripe\ORM\Tests\DataObjectTest\Player;
|
use SilverStripe\ORM\Tests\DataObjectTest\Player;
|
||||||
use SilverStripe\ORM\Tests\DataObjectTest\Sortable;
|
use SilverStripe\ORM\Tests\DataObjectTest\Sortable;
|
||||||
use SilverStripe\ORM\Tests\DataObjectTest\Staff;
|
use SilverStripe\ORM\Tests\DataObjectTest\Staff;
|
||||||
@ -1875,4 +1876,122 @@ class DataListTest extends SapphireTest
|
|||||||
'Product B',
|
'Product B',
|
||||||
], $productTitles);
|
], $productTitles);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function testChunkedFetch()
|
||||||
|
{
|
||||||
|
$expectedIDs = Team::get()->map('ID', 'ID')->toArray();
|
||||||
|
$expectedSize = sizeof($expectedIDs);
|
||||||
|
|
||||||
|
$dataQuery = new DataListQueryCounter(Team::class);
|
||||||
|
$this->chunkTester(
|
||||||
|
$expectedIDs,
|
||||||
|
Team::get()->setDataQuery($dataQuery)->chunkedFetch(),
|
||||||
|
$dataQuery,
|
||||||
|
1
|
||||||
|
);
|
||||||
|
|
||||||
|
$dataQuery = new DataListQueryCounter(Team::class);
|
||||||
|
$this->chunkTester(
|
||||||
|
$expectedIDs,
|
||||||
|
Team::get()->setDataQuery($dataQuery)->chunkedFetch(1),
|
||||||
|
$dataQuery,
|
||||||
|
$expectedSize+1
|
||||||
|
);
|
||||||
|
|
||||||
|
$dataQuery = new DataListQueryCounter(Team::class);
|
||||||
|
$this->chunkTester(
|
||||||
|
$expectedIDs,
|
||||||
|
Team::get()->setDataQuery($dataQuery)->chunkedFetch($expectedSize),
|
||||||
|
$dataQuery,
|
||||||
|
2
|
||||||
|
);
|
||||||
|
|
||||||
|
$dataQuery = new DataListQueryCounter(Team::class);
|
||||||
|
$this->chunkTester(
|
||||||
|
$expectedIDs,
|
||||||
|
Team::get()->setDataQuery($dataQuery)->chunkedFetch($expectedSize-1),
|
||||||
|
$dataQuery,
|
||||||
|
2
|
||||||
|
);
|
||||||
|
|
||||||
|
$dataQuery = new DataListQueryCounter(Team::class);
|
||||||
|
$this->chunkTester(
|
||||||
|
$expectedIDs,
|
||||||
|
Team::get()->setDataQuery($dataQuery)->chunkedFetch($expectedSize+1),
|
||||||
|
$dataQuery,
|
||||||
|
1
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testFilteredChunk()
|
||||||
|
{
|
||||||
|
$dataQuery = new DataListQueryCounter(Team::class);
|
||||||
|
$this->chunkTester(
|
||||||
|
Team::get()->filter('ClassName', Team::class)->map('ID', 'ID')->toArray(),
|
||||||
|
Team::get()->setDataQuery($dataQuery)->filter('ClassName', Team::class)->chunkedFetch(),
|
||||||
|
$dataQuery,
|
||||||
|
1
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testSortedChunk()
|
||||||
|
{
|
||||||
|
$dataQuery = new DataListQueryCounter(Team::class);
|
||||||
|
$this->chunkTester(
|
||||||
|
Team::get()->sort('ID', 'Desc')->map('ID', 'ID')->toArray(),
|
||||||
|
Team::get()->setDataQuery($dataQuery)->sort('ID', 'Desc')->chunkedFetch(),
|
||||||
|
$dataQuery,
|
||||||
|
1
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testEmptyChunk()
|
||||||
|
{
|
||||||
|
$dataQuery = new DataListQueryCounter(Team::class);
|
||||||
|
$this->chunkTester(
|
||||||
|
[],
|
||||||
|
Team::get()->setDataQuery($dataQuery)->filter('ClassName', 'non-sense')->chunkedFetch(),
|
||||||
|
$dataQuery,
|
||||||
|
1
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testInvalidChunkSize()
|
||||||
|
{
|
||||||
|
$this->expectException(InvalidArgumentException::class);
|
||||||
|
foreach (Team::get()->chunkedFetch(0) as $item) {
|
||||||
|
// You don't get the error until you iterate over the list
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Loop over a chunck list and make sure it matches our expected results
|
||||||
|
* @param int[] $expectedIDs
|
||||||
|
* @param iterable $chunkList
|
||||||
|
*/
|
||||||
|
private function chunkTester(
|
||||||
|
array $expectedIDs,
|
||||||
|
iterable $chunkList,
|
||||||
|
DataListQueryCounter $dataQuery,
|
||||||
|
int $expectedQueryCount
|
||||||
|
) {
|
||||||
|
foreach ($chunkList as $chunkedTeam) {
|
||||||
|
$this->assertInstanceOf(
|
||||||
|
Team::class,
|
||||||
|
$chunkedTeam,
|
||||||
|
'Chunk return the correct type of data object'
|
||||||
|
);
|
||||||
|
|
||||||
|
$expectedID = array_shift($expectedIDs);
|
||||||
|
|
||||||
|
$this->assertEquals(
|
||||||
|
$expectedID,
|
||||||
|
$chunkedTeam->ID,
|
||||||
|
'chunk returns the same results in the same order as the regular iterator'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->assertEmpty($expectedIDs, 'chunk returns all the results that the regular iterator does');
|
||||||
|
$this->assertEquals($expectedQueryCount, $dataQuery->getCount());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
42
tests/php/ORM/DataObjectTest/DataListQueryCounter.php
Normal file
42
tests/php/ORM/DataObjectTest/DataListQueryCounter.php
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace SilverStripe\ORM\Tests\DataObjectTest;
|
||||||
|
|
||||||
|
use SilverStripe\ORM\DataQuery;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is designed around the chunk method so we can count the number of queries run.
|
||||||
|
*/
|
||||||
|
class DataListQueryCounter extends DataQuery
|
||||||
|
{
|
||||||
|
private $queryCount = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* When the DataList gets clone our reference to parent will be attached to our cloned DataListQueryCounter. So all
|
||||||
|
* DataListQueryCounter::parent will point back to the original one that go created by with the constructor.
|
||||||
|
* @var DataListQueryCounter
|
||||||
|
*/
|
||||||
|
private $parent;
|
||||||
|
|
||||||
|
public function __construct($dataClass)
|
||||||
|
{
|
||||||
|
parent::__construct($dataClass);
|
||||||
|
$this->parent = $this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getFinalisedQuery($queriedColumns = null)
|
||||||
|
{
|
||||||
|
$this->increment();
|
||||||
|
return parent::getFinalisedQuery($queriedColumns);
|
||||||
|
}
|
||||||
|
|
||||||
|
private function increment()
|
||||||
|
{
|
||||||
|
$this->parent->queryCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getCount()
|
||||||
|
{
|
||||||
|
return $this->parent->queryCount;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user