API Better implementation of caching / rate limiting

This commit is contained in:
Damian Mooyman 2014-04-17 17:44:38 +12:00
parent 4946376793
commit a243a3510a
10 changed files with 348 additions and 32 deletions

View File

@ -1,7 +1,4 @@
<?php
SiteTree::add_extension('VersionFeed');
ContentController::add_extension('VersionFeed_Controller');
// Set the cache lifetime to 5 mins.
SS_Cache::set_cache_lifetime('VersionFeed_Controller', 5*60);

18
_config/versionfeed.yml Normal file
View File

@ -0,0 +1,18 @@
---
Name: versionedfeedconfig
---
Injector:
RateLimitFilter: RateLimitFilter
ContentFilter:
class: CachedContentFilter
constructor:
- %$RateLimitFilter
SiteTree:
extensions:
- VersionFeed
ContentController:
extensions:
- VersionFeed_Controller
VersionFeed_Controller:
dependencies:
ContentFilter: %$ContentFilter

View File

@ -3,7 +3,7 @@
class VersionFeed extends SiteTreeExtension {
private static $db = array(
'PublicHistory' => 'Boolean'
'PublicHistory' => 'Boolean(true)'
);
private static $defaults = array(

View File

@ -6,8 +6,39 @@ class VersionFeed_Controller extends Extension {
'changes',
'allchanges'
);
/**
* Content handler
*
* @var ContentFilter
*/
protected $contentFilter;
/**
* Sets the content filter
*
* @param ContentFilter $contentFilter
*/
public function setContentFilter(ContentFilter $contentFilter) {
$this->contentFilter = $contentFilter;
}
/**
* Evaluates the result of the given callback
*
* @param string $key Unique key for this
* @param callable $callback Callback for evaluating the content
* @return mixed Result of $callback()
*/
protected function filterContent($key, $callback) {
if($this->contentFilter) {
return $this->contentFilter->getContent($key, $callback);
} else {
return call_user_func($callback);
}
}
function onAfterInit() {
public function onAfterInit() {
// RSS feed for per-page changes.
if ($this->owner->PublicHistory) {
RSSFeed::linkToFeed($this->owner->Link() . 'changes',
@ -26,18 +57,15 @@ class VersionFeed_Controller extends Extension {
/**
* Get page-specific changes in a RSS feed.
*/
function changes() {
if(!$this->owner->PublicHistory) throw new SS_HTTPResponse_Exception('Page history not viewable', 404);;
public function changes() {
if(!$this->owner->PublicHistory) throw new SS_HTTPResponse_Exception('Page history not viewable', 404);
// Cache the diffs to remove DOS possibility.
$cache = SS_Cache::factory('VersionFeed_Controller');
$cache->setOption('automatic_serialization', true);
$target = $this->owner;
$key = implode('_', array('changes', $this->owner->ID, $this->owner->Version));
$entries = $cache->load($key);
if(!$entries || isset($_GET['flush'])) {
$entries = $this->owner->getDiffedChanges();
$cache->save($entries, $key);
}
$entries = $this->filterContent($key, function() use ($target) {
return $target->getDiffedChanges();
});
// Generate the output.
$title = sprintf(_t('RSSHistory.SINGLEPAGEFEEDTITLE', 'Updates to %s page'), $this->owner->Title);
@ -49,42 +77,47 @@ class VersionFeed_Controller extends Extension {
/**
* Get all changes from the site in a RSS feed.
*/
function allchanges() {
public function allchanges() {
$latestChanges = DB::query('SELECT * FROM "SiteTree_versions" WHERE "WasPublished"=\'1\' AND "CanViewType" IN (\'Anyone\', \'Inherit\') AND "ShowInSearch"=1 AND ("PublicHistory" IS NULL OR "PublicHistory" = \'1\') ORDER BY "LastEdited" DESC LIMIT 20');
$latestChanges = DB::query('
SELECT * FROM "SiteTree_versions"
WHERE "WasPublished" = \'1\'
AND "CanViewType" IN (\'Anyone\', \'Inherit\')
AND "ShowInSearch" = 1
AND ("PublicHistory" IS NULL OR "PublicHistory" = \'1\')
ORDER BY "LastEdited" DESC LIMIT 20'
);
$lastChange = $latestChanges->record();
$latestChanges->rewind();
if ($lastChange) {
// Cache the diffs to remove DOS possibility.
$member = Member::currentUser();
$cache = SS_Cache::factory('VersionFeed_Controller');
$cache->setOption('automatic_serialization', true);
$key = 'allchanges' . preg_replace('#[^a-zA-Z0-9_]#', '', $lastChange['LastEdited']) .
($member ? $member->ID : 'public');
$changeList = $cache->load($key);
if(!$changeList || isset($_GET['flush'])) {
$key = 'allchanges'
. preg_replace('#[^a-zA-Z0-9_]#', '', $lastChange['LastEdited'])
. (Member::currentUserID() ?: 'public');
$changeList = $this->filterContent($key, function() use ($latestChanges) {
$changeList = new ArrayList();
$canView = array();
foreach ($latestChanges as $record) {
// Check if the page should be visible.
// WARNING: although we are providing historical details, we check the current configuration.
$page = SiteTree::get()->filter(array('ID'=>$record['RecordID']))->First();
if (!$page->canView(new Member())) continue;
$id = $record['RecordID'];
if(!isset($canView[$id])) {
$page = SiteTree::get()->byID($id);
$canView[$id] = $page && $page->canView(new Member());
}
if (!$canView[$id]) continue;
// Get the diff to the previous version.
$version = new Versioned_Version($record);
$changes = $version->getDiffedChanges($version->Version, false);
if ($changes && $changes->Count()) $changeList->push($changes->First());
}
$cache->save($changeList, $key);
}
return $changeList;
});
} else {
$changeList = new ArrayList();
}

View File

@ -0,0 +1,22 @@
<?php
/**
* Caches results of a callback
*/
class CachedContentFilter extends ContentFilter {
public function getContent($key, $callback) {
$cache = $this->getCache();
// Return cached value if available
$result = isset($_GET['flush'])
? null
: $cache->load($key);
if($result) return $result;
// Fallback to generate result
$result = parent::getContent($key, $callback);
$cache->save($result, $key);
return $result;
}
}

View File

@ -0,0 +1,45 @@
<?php
/**
* Conditionally executes a given callback, attempting to return the desired results
* of its execution.
*/
abstract class ContentFilter {
/**
* Nested content filter
*
* @var ContentFilter
*/
protected $nestedContentFilter;
public function __construct($nestedContentFilter = null) {
$this->nestedContentFilter = $nestedContentFilter;
}
/**
* Gets the cache to use
*
* @return Zend_Cache_Frontend
*/
protected function getCache() {
$cache = SS_Cache::factory('VersionFeed_Controller');
$cache->setOption('automatic_serialization', true);
return $cache;
}
/**
* Evaluates the result of the given callback
*
* @param string $key Unique key for this
* @param callable $callback Callback for evaluating the content
* @return mixed Result of $callback()
*/
public function getContent($key, $callback) {
if($this->nestedContentFilter) {
return $this->nestedContentFilter->getContent($key, $callback);
} else {
return call_user_func($callback);
}
}
}

View File

@ -0,0 +1,92 @@
<?php
/**
* Provides rate limiting of execution of a callback
*/
class RateLimitFilter extends ContentFilter {
/**
* Time duration (in second) to allow for generation of cached results. Requests to
* pages that within this time period that do not hit the cache (and would otherwise trigger
* a version query) will be presented with a 429 (rate limit) HTTP error
*
* @config
* @var int
*/
private static $lock_timeout = 10;
/**
* Determine if the cache generation should be locked on a per-page basis. If true, concurrent page versions
* may be generated without rate interference.
*
* Suggested to turn this to false on small sites that will not have many concurrent views of page versions
*
* @config
* @var bool
*/
private static $lock_bypage = true;
/**
* Determine if rate limiting should be applied independently to each IP address. This method is not
* reliable, as most DDoS attacks use multiple IP addresses.
*
* @config
* @var bool
*/
private static $lock_byuserip = false;
/**
* Cache key prefix
*/
const CACHE_PREFIX = 'RateLimitBegin';
/**
* Determines the key to use for saving the current rate
*
* @param string $itemkey Input key
* @return string Result key
*/
protected function getCacheKey($itemkey) {
$key = self::CACHE_PREFIX;
// Add global identifier
if(Config::inst()->get(get_class(), 'lock_bypage')) {
$key .= '_' . md5($itemkey);
}
// Add user-specific identifier
if(Config::inst()->get(get_class(), 'lock_byuserip') && Controller::has_curr()) {
$ip = Controller::curr()->getRequest()->getIP();
$key .= '_' . md5($ip);
}
return $key;
}
public function getContent($key, $callback) {
// Bypass rate limiting if flushing, or timeout isn't set
$timeout = Config::inst()->get(get_class(), 'lock_timeout');
if(isset($_GET['flush']) || !$timeout) {
return parent::getContent($key, $callback);
}
// Generate result with rate limiting enabled
$limitKey = $this->getCacheKey($key);
$cache = $this->getCache();
if($cacheBegin = $cache->load($limitKey)) {
if(time() - $cacheBegin < $timeout) {
// Politely inform visitor of limit
$response = new SS_HTTPResponse_Exception('Too Many Requests.', 429);
$response->getResponse()->addHeader('Retry-After', 1 + time() - $cacheBegin);
throw $response;
}
}
// Generate result with rate limit locked
$cache->save(time(), $limitKey);
$result = parent::getContent($key, $callback);
$cache->remove($limitKey);
return $result;
}
}

View File

@ -0,0 +1,7 @@
# 1.0.3
## Upgrading
* Caching and rate limiting rules now apply in this version, and it may be necessary to adjust the values
for `RateLimitFilter.lock_timeout`, `RateLimitFilter.lock_bypage` and `RateLimitFilter.lock_byuserip` configs.
See the [Developer Documentation](../developer.md) for information on how these affect cache performance and behaviour.

View File

@ -21,3 +21,35 @@ and returning the URL of your desired RSS feed:
}
This can be used in templates as `$DefaultRSSLink`.
### Rate limiting and caching
By default all content is filtered based on the rules specified in `versionfeed/_config/versionfeed.yml`.
Two filters are applied on top of one another:
* `CachedContentFilter` provides caching of versions based on an identifier built up of the record ID and the
most recently saved version number. There is no configuration required for this class.
* `RateLimitFilter` provides rate limiting to ensure that requests to uncached data does not overload the
server. This filter will only be applied if the `CachedContentFilter` does not have any cached record
for a request.
Either one of these can be replaced, added to, or removed, by adjusting the `VersionFeed_Controller.dependencies`
config to point to a replacement (or no) filter.
For large servers with a high level of traffic (more than 1 visits every 10 seconds) the default settings should
be sufficient.
For smaller servers where it's reasonable to apply a more strict approach to rate limiting, consider setting the
`RateLimitFilter.lock_bypage` config to false, meaning that a single limit will be applied to all URLs.
If left on true, then each URL will have its own rate limit, and on smaller servers with lots of
concurrent requests this can still overwhelm capacity. This will also leave smaller servers vulnerable to DDoS
attacks which target many URLs simultaneously. This config will have no effect on the `allchanges` method.
`RateLimitFilter.lock_byuserip` can also be set to true in order to prevent requests from different users
interfering with one another. However, this can provide an ineffective safeguard against malicious DDoS attacks
which use multiple IP addresses.
Another important variable is the `RateLimitFilter.lock_timeout` config, which is set to 10 seconds by default.
This should be increased on sites which may be slow to generate page versions, whether due to lower
server capacity or volume of content (number of page versions). Requests to this page after the timeout
will not trigger any rate limit safeguard, so you should be sure that this is set to an appropriate level.

View File

@ -5,12 +5,29 @@ class VersionFeedFunctionalTest extends FunctionalTest {
'Page' => array('VersionFeed'),
'Page_Controller' => array('VersionFeed_Controller'),
);
protected $userIP;
public function setUp() {
parent::setUp();
$cache = SS_Cache::factory('VersionFeed_Controller');
$cache->clean(Zend_Cache::CLEANING_MODE_ALL);
$this->userIP = isset($_SERVER['HTTP_CLIENT_IP']) ? $_SERVER['HTTP_CLIENT_IP'] : null;
Config::nest();
Config::inst()->update('RateLimitFilter', 'lock_timeout', 20);
Config::inst()->update('RateLimitFilter', 'lock_bypage', false);
Config::inst()->update('RateLimitFilter', 'lock_byuserip', false);
}
public function tearDown() {
Config::unnest();
$_SERVER['HTTP_CLIENT_IP'] = $this->userIP;
parent::tearDown();
}
public function testPublicHistory() {
@ -36,6 +53,59 @@ class VersionFeedFunctionalTest extends FunctionalTest {
$xml = simplexml_load_string($response->getBody());
$this->assertTrue((bool)$xml->channel->item);
}
public function testRateLimiting() {
$page1 = $this->createPageWithChanges(array('PublicHistory' => true, 'Title' => 'Page1'));
$page2 = $this->createPageWithChanges(array('PublicHistory' => true, 'Title' => 'Page2'));
// Artifically set cache lock
Config::inst()->update('RateLimitFilter', 'lock_byuserip', false);
$cache = SS_Cache::factory('VersionFeed_Controller');
$cache->setOption('automatic_serialization', true);
$cache->save(time() - 2, RateLimitFilter::CACHE_PREFIX);
// Test normal hit
$response = $this->get($page1->RelativeLink('changes'));
$this->assertEquals(429, $response->getStatusCode());
$this->assertGreaterThan(0, $response->getHeader('Retry-After'));
$response = $this->get($page2->RelativeLink('changes'));
$this->assertEquals(429, $response->getStatusCode());
$this->assertGreaterThan(0, $response->getHeader('Retry-After'));
// Test page specific lock
Config::inst()->update('RateLimitFilter', 'lock_bypage', true);
$key = implode('_', array(
'changes',
$page1->ID,
Versioned::get_versionnumber_by_stage('SiteTree', 'Live', $page1->ID, false)
));
$key = RateLimitFilter::CACHE_PREFIX . '_' . md5($key);
$cache->save(time() - 2, $key);
$response = $this->get($page1->RelativeLink('changes'));
$this->assertEquals(429, $response->getStatusCode());
$this->assertGreaterThan(0, $response->getHeader('Retry-After'));
$response = $this->get($page2->RelativeLink('changes'));
$this->assertEquals(200, $response->getStatusCode());
Config::inst()->update('RateLimitFilter', 'lock_bypage', false);
// Test rate limit hit by IP
Config::inst()->update('RateLimitFilter', 'lock_byuserip', true);
$_SERVER['HTTP_CLIENT_IP'] = '127.0.0.1';
$cache->save(time() - 2, RateLimitFilter::CACHE_PREFIX . '_' . md5('127.0.0.1'));
$response = $this->get($page1->RelativeLink('changes'));
$this->assertEquals(429, $response->getStatusCode());
$this->assertGreaterThan(0, $response->getHeader('Retry-After'));
// Test rate limit doesn't hit other IP
$_SERVER['HTTP_CLIENT_IP'] = '127.0.0.20';
$cache->save(time() - 2, RateLimitFilter::CACHE_PREFIX . '_' . md5('127.0.0.1'));
$response = $this->get($page1->RelativeLink('changes'));
$this->assertEquals(200, $response->getStatusCode());
// Restore setting
Config::inst()->update('RateLimitFilter', 'lock_byuserip', false);
}
public function testContainsChangesForPageOnly() {
$page1 = $this->createPageWithChanges(array('Title' => 'Page1'));