diff --git a/docs/en/02_Developer_Guides/00_Model/03_Lists.md b/docs/en/02_Developer_Guides/00_Model/03_Lists.md index d953dd7e1..bb4161b4a 100644 --- a/docs/en/02_Developer_Guides/00_Model/03_Lists.md +++ b/docs/en/02_Developer_Guides/00_Model/03_Lists.md @@ -10,7 +10,7 @@ Whenever using the ORM to fetch records or navigate relationships you will recei either [DataList](api:SilverStripe\ORM\DataList) or [RelationList](api:SilverStripe\ORM\RelationList). This object gives you the ability to iterate over each of the results or modify. -## Iterating over the list. +## Iterating over the list [SS_List](api:SilverStripe\ORM\SS_List) implements `IteratorAggregate`, allowing you to loop over the instance. @@ -32,7 +32,7 @@ Or in the template engine: <% end_loop %> ``` -## Finding an item by value. +## Finding an item by value ```php // $list->find($key, $value); @@ -79,6 +79,42 @@ echo $members->column('Email'); // ]; ``` +## Iterating over a large list {#chunkedFetch} + +When iterating over a DataList, all DataObjects in the list will be loaded in memory. This can consume a lot of memory when working with a large data set. + +To limit the number of DataObjects loaded in memory, you can use the `chunkedFetch()` method on your DataList. In most cases, you can iterate over the results of `chunkedFetch()` the same way you would iterate over your DataList. Internally, `chunkedFetch()` will split your DataList query into smaller queries and keep running through them until it runs out of results. + +```php +$members = Member::get(); +foreach ($members as $member) { + echo $member->Email; +} + +// This call will produce the same output, but it will use less memory and run more queries against the database +$members = Member::get()->chunkedFetch(); +foreach ($members as $member) { + echo $member->Email; +} +``` + +`chunkedFetch()` will respect any filter or sort condition applied to the DataList. By default, chunk will limit each query to 1000 results. You can explicitly set this limit by passing an integer to `chunkedFetch()`. + +```php +$members = Member::get() + ->filter('Email:PartialMatch', 'silverstripe.com') + ->sort('Email') + ->chunkedFetch(10); +foreach ($members as $member) { + echo $member->Email; +} +``` + +They are some limitations: +* `chunkedFetch()` will ignore any limit or offset you have applied to your DataList +* you can not "count" a chunked list or do any other call against it aside from iterating it +* while iterating over a chunked list, you can not perform any operation that would alter the order of the items. + ## ArrayList [ArrayList](api:SilverStripe\ORM\ArrayList) exists to wrap a standard PHP array in the same API as a database backed list. diff --git a/docs/en/04_Changelogs/4.8.0.md b/docs/en/04_Changelogs/4.8.0.md index 893d40874..746868306 100644 --- a/docs/en/04_Changelogs/4.8.0.md +++ b/docs/en/04_Changelogs/4.8.0.md @@ -6,6 +6,8 @@ ## New features +* [Added a `chunkedFetch()` method to `DataList`](/Developer_Guides/Model/Lists#chunkedFetch) to avoid loading large result sets in memory all at once. + ### Support for silverstripe/graphql v4 {#graphql-v4} The [silverstripe/graphql](http://github.com/silverstripe/silverstripe-graphql/issues) module diff --git a/src/ORM/DataList.php b/src/ORM/DataList.php index 3a67edf14..d382feb69 100644 --- a/src/ORM/DataList.php +++ b/src/ORM/DataList.php @@ -1286,4 +1286,44 @@ class DataList extends ViewableData implements SS_List, Filterable, Sortable, Li { throw new \BadMethodCallException("Can't alter items in a DataList using array-access"); } + + /** + * Iterate over this DataList in "chunks". This will break the query in smaller subsets and avoid loading the entire + * result set in memory at once. Beware not to perform any operations on the results that might alter the return + * order. Otherwise, you might break subsequent chunks. + * + * You also can not define a custom limit or offset when using the chunk method. + * + * @param int $chunkSize + * @throws InvalidArgumentException If `$chunkSize` has an invalid size. + * @return Generator|DataObject[] + */ + public function chunkedFetch(int $chunkSize = 1000): iterable + { + if ($chunkSize < 1) { + throw new InvalidArgumentException(sprintf( + '%s::%s: chunkSize must be greater than or equal to 1', + __CLASS__, + __METHOD__ + )); + } + + $currentChunk = 0; + + // Keep looping until we run out of chunks + while ($chunk = $this->limit($chunkSize, $chunkSize * $currentChunk)->getIterator()) { + // Loop over all the item in our chunk + foreach ($chunk as $item) { + yield $item; + } + + + if ($chunk->count() < $chunkSize) { + // If our last chunk had less item than our chunkSize, we've reach the end. + break; + } + + $currentChunk++; + } + } } diff --git a/tests/php/ORM/DataListTest.php b/tests/php/ORM/DataListTest.php index 9658f52d1..22e1cc513 100755 --- a/tests/php/ORM/DataListTest.php +++ b/tests/php/ORM/DataListTest.php @@ -11,10 +11,11 @@ use SilverStripe\ORM\DataQuery; use SilverStripe\ORM\DB; use SilverStripe\ORM\Filterable; use SilverStripe\ORM\Filters\ExactMatchFilter; +use SilverStripe\ORM\Tests\DataObjectTest\DataListQueryCounter; +use SilverStripe\ORM\Tests\DataObjectTest\Fixture; use SilverStripe\ORM\Tests\DataObjectTest\Bracket; use SilverStripe\ORM\Tests\DataObjectTest\EquipmentCompany; use SilverStripe\ORM\Tests\DataObjectTest\Fan; -use SilverStripe\ORM\Tests\DataObjectTest\Fixture; use SilverStripe\ORM\Tests\DataObjectTest\Player; use SilverStripe\ORM\Tests\DataObjectTest\Sortable; use SilverStripe\ORM\Tests\DataObjectTest\Staff; @@ -1875,4 +1876,122 @@ class DataListTest extends SapphireTest 'Product B', ], $productTitles); } + + public function testChunkedFetch() + { + $expectedIDs = Team::get()->map('ID', 'ID')->toArray(); + $expectedSize = sizeof($expectedIDs); + + $dataQuery = new DataListQueryCounter(Team::class); + $this->chunkTester( + $expectedIDs, + Team::get()->setDataQuery($dataQuery)->chunkedFetch(), + $dataQuery, + 1 + ); + + $dataQuery = new DataListQueryCounter(Team::class); + $this->chunkTester( + $expectedIDs, + Team::get()->setDataQuery($dataQuery)->chunkedFetch(1), + $dataQuery, + $expectedSize+1 + ); + + $dataQuery = new DataListQueryCounter(Team::class); + $this->chunkTester( + $expectedIDs, + Team::get()->setDataQuery($dataQuery)->chunkedFetch($expectedSize), + $dataQuery, + 2 + ); + + $dataQuery = new DataListQueryCounter(Team::class); + $this->chunkTester( + $expectedIDs, + Team::get()->setDataQuery($dataQuery)->chunkedFetch($expectedSize-1), + $dataQuery, + 2 + ); + + $dataQuery = new DataListQueryCounter(Team::class); + $this->chunkTester( + $expectedIDs, + Team::get()->setDataQuery($dataQuery)->chunkedFetch($expectedSize+1), + $dataQuery, + 1 + ); + } + + public function testFilteredChunk() + { + $dataQuery = new DataListQueryCounter(Team::class); + $this->chunkTester( + Team::get()->filter('ClassName', Team::class)->map('ID', 'ID')->toArray(), + Team::get()->setDataQuery($dataQuery)->filter('ClassName', Team::class)->chunkedFetch(), + $dataQuery, + 1 + ); + } + + public function testSortedChunk() + { + $dataQuery = new DataListQueryCounter(Team::class); + $this->chunkTester( + Team::get()->sort('ID', 'Desc')->map('ID', 'ID')->toArray(), + Team::get()->setDataQuery($dataQuery)->sort('ID', 'Desc')->chunkedFetch(), + $dataQuery, + 1 + ); + } + + public function testEmptyChunk() + { + $dataQuery = new DataListQueryCounter(Team::class); + $this->chunkTester( + [], + Team::get()->setDataQuery($dataQuery)->filter('ClassName', 'non-sense')->chunkedFetch(), + $dataQuery, + 1 + ); + } + + public function testInvalidChunkSize() + { + $this->expectException(InvalidArgumentException::class); + foreach (Team::get()->chunkedFetch(0) as $item) { + // You don't get the error until you iterate over the list + }; + } + + /** + * Loop over a chunck list and make sure it matches our expected results + * @param int[] $expectedIDs + * @param iterable $chunkList + */ + private function chunkTester( + array $expectedIDs, + iterable $chunkList, + DataListQueryCounter $dataQuery, + int $expectedQueryCount + ) { + foreach ($chunkList as $chunkedTeam) { + $this->assertInstanceOf( + Team::class, + $chunkedTeam, + 'Chunk return the correct type of data object' + ); + + $expectedID = array_shift($expectedIDs); + + $this->assertEquals( + $expectedID, + $chunkedTeam->ID, + 'chunk returns the same results in the same order as the regular iterator' + ); + } + + $this->assertEmpty($expectedIDs, 'chunk returns all the results that the regular iterator does'); + $this->assertEquals($expectedQueryCount, $dataQuery->getCount()); + } } diff --git a/tests/php/ORM/DataObjectTest/DataListQueryCounter.php b/tests/php/ORM/DataObjectTest/DataListQueryCounter.php new file mode 100644 index 000000000..0346b3b95 --- /dev/null +++ b/tests/php/ORM/DataObjectTest/DataListQueryCounter.php @@ -0,0 +1,42 @@ +parent = $this; + } + + public function getFinalisedQuery($queriedColumns = null) + { + $this->increment(); + return parent::getFinalisedQuery($queriedColumns); + } + + private function increment() + { + $this->parent->queryCount++; + } + + public function getCount() + { + return $this->parent->queryCount; + } +}