Merge pull request #5 from tractorcow/pulls/3.1-rate-limiting

API Better implementation of caching / rate limiting
This commit is contained in:
Mateusz U 2014-04-30 15:47:10 +12:00
commit 706ad85054
10 changed files with 371 additions and 32 deletions

View File

@ -1,7 +1,4 @@
<?php
SiteTree::add_extension('VersionFeed');
ContentController::add_extension('VersionFeed_Controller');
// Set the cache lifetime to 5 mins.
SS_Cache::set_cache_lifetime('VersionFeed_Controller', 5*60);

18
_config/versionfeed.yml Normal file
View File

@ -0,0 +1,18 @@
---
Name: versionedfeedconfig
---
Injector:
RateLimitFilter: \VersionFeed\Filters\RateLimitFilter
ContentFilter:
class: \VersionFeed\Filters\CachedContentFilter
constructor:
- %$RateLimitFilter
SiteTree:
extensions:
- VersionFeed
ContentController:
extensions:
- VersionFeed_Controller
VersionFeed_Controller:
dependencies:
ContentFilter: %$ContentFilter

View File

@ -3,7 +3,7 @@
class VersionFeed extends SiteTreeExtension {
private static $db = array(
'PublicHistory' => 'Boolean'
'PublicHistory' => 'Boolean(true)'
);
private static $defaults = array(

View File

@ -6,8 +6,39 @@ class VersionFeed_Controller extends Extension {
'changes',
'allchanges'
);
/**
* Content handler
*
* @var \VersionFeed\Filters\ContentFilter
*/
protected $contentFilter;
/**
* Sets the content filter
*
* @param \VersionFeed\Filters\ContentFilter $contentFilter
*/
public function setContentFilter(\VersionFeed\Filters\ContentFilter $contentFilter) {
$this->contentFilter = $contentFilter;
}
/**
* Evaluates the result of the given callback
*
* @param string $key Unique key for this
* @param callable $callback Callback for evaluating the content
* @return mixed Result of $callback()
*/
protected function filterContent($key, $callback) {
if($this->contentFilter) {
return $this->contentFilter->getContent($key, $callback);
} else {
return call_user_func($callback);
}
}
function onAfterInit() {
public function onAfterInit() {
// RSS feed for per-page changes.
if ($this->owner->PublicHistory) {
RSSFeed::linkToFeed($this->owner->Link() . 'changes',
@ -26,18 +57,15 @@ class VersionFeed_Controller extends Extension {
/**
* Get page-specific changes in a RSS feed.
*/
function changes() {
if(!$this->owner->PublicHistory) throw new SS_HTTPResponse_Exception('Page history not viewable', 404);;
public function changes() {
if(!$this->owner->PublicHistory) throw new SS_HTTPResponse_Exception('Page history not viewable', 404);
// Cache the diffs to remove DOS possibility.
$cache = SS_Cache::factory('VersionFeed_Controller');
$cache->setOption('automatic_serialization', true);
$target = $this->owner;
$key = implode('_', array('changes', $this->owner->ID, $this->owner->Version));
$entries = $cache->load($key);
if(!$entries || isset($_GET['flush'])) {
$entries = $this->owner->getDiffedChanges();
$cache->save($entries, $key);
}
$entries = $this->filterContent($key, function() use ($target) {
return $target->getDiffedChanges();
});
// Generate the output.
$title = sprintf(_t('RSSHistory.SINGLEPAGEFEEDTITLE', 'Updates to %s page'), $this->owner->Title);
@ -49,42 +77,47 @@ class VersionFeed_Controller extends Extension {
/**
* Get all changes from the site in a RSS feed.
*/
function allchanges() {
public function allchanges() {
$latestChanges = DB::query('SELECT * FROM "SiteTree_versions" WHERE "WasPublished"=\'1\' AND "CanViewType" IN (\'Anyone\', \'Inherit\') AND "ShowInSearch"=1 AND ("PublicHistory" IS NULL OR "PublicHistory" = \'1\') ORDER BY "LastEdited" DESC LIMIT 20');
$latestChanges = DB::query('
SELECT * FROM "SiteTree_versions"
WHERE "WasPublished" = \'1\'
AND "CanViewType" IN (\'Anyone\', \'Inherit\')
AND "ShowInSearch" = 1
AND ("PublicHistory" IS NULL OR "PublicHistory" = \'1\')
ORDER BY "LastEdited" DESC LIMIT 20'
);
$lastChange = $latestChanges->record();
$latestChanges->rewind();
if ($lastChange) {
// Cache the diffs to remove DOS possibility.
$member = Member::currentUser();
$cache = SS_Cache::factory('VersionFeed_Controller');
$cache->setOption('automatic_serialization', true);
$key = 'allchanges' . preg_replace('#[^a-zA-Z0-9_]#', '', $lastChange['LastEdited']) .
($member ? $member->ID : 'public');
$changeList = $cache->load($key);
if(!$changeList || isset($_GET['flush'])) {
$key = 'allchanges'
. preg_replace('#[^a-zA-Z0-9_]#', '', $lastChange['LastEdited'])
. (Member::currentUserID() ?: 'public');
$changeList = $this->filterContent($key, function() use ($latestChanges) {
$changeList = new ArrayList();
$canView = array();
foreach ($latestChanges as $record) {
// Check if the page should be visible.
// WARNING: although we are providing historical details, we check the current configuration.
$page = SiteTree::get()->filter(array('ID'=>$record['RecordID']))->First();
if (!$page->canView(new Member())) continue;
$id = $record['RecordID'];
if(!isset($canView[$id])) {
$page = SiteTree::get()->byID($id);
$canView[$id] = $page && $page->canView(new Member());
}
if (!$canView[$id]) continue;
// Get the diff to the previous version.
$version = new Versioned_Version($record);
$changes = $version->getDiffedChanges($version->Version, false);
if ($changes && $changes->Count()) $changeList->push($changes->First());
}
$cache->save($changeList, $key);
}
return $changeList;
});
} else {
$changeList = new ArrayList();
}

View File

@ -0,0 +1,24 @@
<?php
namespace VersionFeed\Filters;
/**
* Caches results of a callback
*/
class CachedContentFilter extends ContentFilter {
public function getContent($key, $callback) {
$cache = $this->getCache();
// Return cached value if available
$result = isset($_GET['flush'])
? null
: $cache->load($key);
if($result) return $result;
// Fallback to generate result
$result = parent::getContent($key, $callback);
$cache->save($result, $key);
return $result;
}
}

View File

@ -0,0 +1,47 @@
<?php
namespace VersionFeed\Filters;
/**
* Conditionally executes a given callback, attempting to return the desired results
* of its execution.
*/
abstract class ContentFilter {
/**
* Nested content filter
*
* @var ContentFilter
*/
protected $nestedContentFilter;
public function __construct($nestedContentFilter = null) {
$this->nestedContentFilter = $nestedContentFilter;
}
/**
* Gets the cache to use
*
* @return Zend_Cache_Frontend
*/
protected function getCache() {
$cache = \SS_Cache::factory('VersionFeed_Controller');
$cache->setOption('automatic_serialization', true);
return $cache;
}
/**
* Evaluates the result of the given callback
*
* @param string $key Unique key for this
* @param callable $callback Callback for evaluating the content
* @return mixed Result of $callback()
*/
public function getContent($key, $callback) {
if($this->nestedContentFilter) {
return $this->nestedContentFilter->getContent($key, $callback);
} else {
return call_user_func($callback);
}
}
}

View File

@ -0,0 +1,112 @@
<?php
namespace VersionFeed\Filters;
/**
* Provides rate limiting of execution of a callback
*/
class RateLimitFilter extends ContentFilter {
/**
* Time duration (in second) to allow for generation of cached results. Requests to
* pages that within this time period that do not hit the cache (and would otherwise trigger
* a version query) will be presented with a 429 (rate limit) HTTP error
*
* @config
* @var int
*/
private static $lock_timeout = 5;
/**
* Determine if the cache generation should be locked on a per-page basis. If true, concurrent page versions
* may be generated without rate interference.
*
* @config
* @var bool
*/
private static $lock_bypage = false;
/**
* Determine if rate limiting should be applied independently to each IP address. This method is not
* reliable, as most DDoS attacks use multiple IP addresses.
*
* @config
* @var bool
*/
private static $lock_byuserip = false;
/**
* Time duration (in sections) to deny further search requests after a successful search.
* Search requests within this time period while another query is in progress will be
* presented with a 429 (rate limit)
*
* @config
* @var int
*/
private static $lock_cooldown = 2;
/**
* Cache key prefix
*/
const CACHE_PREFIX = 'RateLimitBegin';
/**
* Determines the key to use for saving the current rate
*
* @param string $itemkey Input key
* @return string Result key
*/
protected function getCacheKey($itemkey) {
$key = self::CACHE_PREFIX;
// Add global identifier
if(\Config::inst()->get(get_class(), 'lock_bypage')) {
$key .= '_' . md5($itemkey);
}
// Add user-specific identifier
if(\Config::inst()->get(get_class(), 'lock_byuserip') && \Controller::has_curr()) {
$ip = \Controller::curr()->getRequest()->getIP();
$key .= '_' . md5($ip);
}
return $key;
}
public function getContent($key, $callback) {
// Bypass rate limiting if flushing, or timeout isn't set
$timeout = \Config::inst()->get(get_class(), 'lock_timeout');
if(isset($_GET['flush']) || !$timeout) {
return parent::getContent($key, $callback);
}
// Generate result with rate limiting enabled
$limitKey = $this->getCacheKey($key);
$cache = $this->getCache();
if($lockedUntil = $cache->load($limitKey)) {
if(time() < $lockedUntil) {
// Politely inform visitor of limit
$response = new \SS_HTTPResponse_Exception('Too Many Requests.', 429);
$response->getResponse()->addHeader('Retry-After', 1 + $lockedUntil - time());
throw $response;
}
}
// Apply rate limit
$cache->save(time() + $timeout, $limitKey);
// Generate results
$result = parent::getContent($key, $callback);
// Reset rate limit with optional cooldown
if($cooldown = \Config::inst()->get(get_class(), 'lock_cooldown')) {
// Set cooldown on successful query execution
$cache->save(time() + $cooldown, $limitKey);
} else {
// Without cooldown simply disable lock
$cache->remove($limitKey);
}
return $result;
}
}

View File

@ -0,0 +1,7 @@
# 1.0.3
## Upgrading
* Caching and rate limiting rules now apply in this version, and it may be necessary to adjust the values
for `RateLimitFilter.lock_timeout`, `RateLimitFilter.lock_bypage` and `RateLimitFilter.lock_byuserip` configs.
See the [Developer Documentation](../developer.md) for information on how these affect cache performance and behaviour.

View File

@ -21,3 +21,33 @@ and returning the URL of your desired RSS feed:
}
This can be used in templates as `$DefaultRSSLink`.
### Rate limiting and caching
By default all content is filtered based on the rules specified in `versionfeed/_config/versionfeed.yml`.
Two filters are applied on top of one another:
* `CachedContentFilter` provides caching of versions based on an identifier built up of the record ID and the
most recently saved version number. There is no configuration required for this class.
* `RateLimitFilter` provides rate limiting to ensure that requests to uncached data does not overload the
server. This filter will only be applied if the `CachedContentFilter` does not have any cached record
for a request.
Either one of these can be replaced, added to, or removed, by adjusting the `VersionFeed_Controller.dependencies`
config to point to a replacement (or no) filter.
For smaller servers where it's reasonable to apply a strict approach to rate limiting the default
settings should be sufficient. The `RateLimitFilter.lock_bypage` config defaults to false, meaning that a
single limit will be applied to all URLs. If set to true, then each URL will have its own rate limit,
and on smaller servers with lots of concurrent requests this can still overwhelm capacity. This will
also leave smaller servers vulnerable to DDoS attacks which target many URLs simultaneously.
This config will have no effect on the `allchanges` method.
`RateLimitFilter.lock_byuserip` can be set to true in order to prevent requests from different users
interfering with one another. However, this can provide an ineffective safeguard against malicious DDoS attacks
which use multiple IP addresses.
Another important variable is the `RateLimitFilter.lock_timeout` config, which is set to 5 seconds by default.
This should be increased on sites which may be slow to generate page versions, whether due to lower
server capacity or volume of content (number of page versions). Requests to this page after the timeout
will not trigger any rate limit safeguard, so you should be sure that this is set to an appropriate level.

View File

@ -5,12 +5,30 @@ class VersionFeedFunctionalTest extends FunctionalTest {
'Page' => array('VersionFeed'),
'Page_Controller' => array('VersionFeed_Controller'),
);
protected $userIP;
public function setUp() {
parent::setUp();
$cache = SS_Cache::factory('VersionFeed_Controller');
$cache->clean(Zend_Cache::CLEANING_MODE_ALL);
$this->userIP = isset($_SERVER['HTTP_CLIENT_IP']) ? $_SERVER['HTTP_CLIENT_IP'] : null;
Config::nest();
Config::inst()->update('VersionFeed\Filters\RateLimitFilter', 'lock_timeout', 20);
Config::inst()->update('VersionFeed\Filters\RateLimitFilter', 'lock_bypage', false);
Config::inst()->update('VersionFeed\Filters\RateLimitFilter', 'lock_byuserip', false);
Config::inst()->update('VersionFeed\Filters\RateLimitFilter', 'lock_cooldown', false);
}
public function tearDown() {
Config::unnest();
$_SERVER['HTTP_CLIENT_IP'] = $this->userIP;
parent::tearDown();
}
public function testPublicHistory() {
@ -36,6 +54,59 @@ class VersionFeedFunctionalTest extends FunctionalTest {
$xml = simplexml_load_string($response->getBody());
$this->assertTrue((bool)$xml->channel->item);
}
public function testRateLimiting() {
$page1 = $this->createPageWithChanges(array('PublicHistory' => true, 'Title' => 'Page1'));
$page2 = $this->createPageWithChanges(array('PublicHistory' => true, 'Title' => 'Page2'));
// Artifically set cache lock
Config::inst()->update('VersionFeed\Filters\RateLimitFilter', 'lock_byuserip', false);
$cache = SS_Cache::factory('VersionFeed_Controller');
$cache->setOption('automatic_serialization', true);
$cache->save(time() + 10, \VersionFeed\Filters\RateLimitFilter::CACHE_PREFIX);
// Test normal hit
$response = $this->get($page1->RelativeLink('changes'));
$this->assertEquals(429, $response->getStatusCode());
$this->assertGreaterThan(0, $response->getHeader('Retry-After'));
$response = $this->get($page2->RelativeLink('changes'));
$this->assertEquals(429, $response->getStatusCode());
$this->assertGreaterThan(0, $response->getHeader('Retry-After'));
// Test page specific lock
Config::inst()->update('VersionFeed\Filters\RateLimitFilter', 'lock_bypage', true);
$key = implode('_', array(
'changes',
$page1->ID,
Versioned::get_versionnumber_by_stage('SiteTree', 'Live', $page1->ID, false)
));
$key = \VersionFeed\Filters\RateLimitFilter::CACHE_PREFIX . '_' . md5($key);
$cache->save(time() + 10, $key);
$response = $this->get($page1->RelativeLink('changes'));
$this->assertEquals(429, $response->getStatusCode());
$this->assertGreaterThan(0, $response->getHeader('Retry-After'));
$response = $this->get($page2->RelativeLink('changes'));
$this->assertEquals(200, $response->getStatusCode());
Config::inst()->update('VersionFeed\Filters\RateLimitFilter', 'lock_bypage', false);
// Test rate limit hit by IP
Config::inst()->update('VersionFeed\Filters\RateLimitFilter', 'lock_byuserip', true);
$_SERVER['HTTP_CLIENT_IP'] = '127.0.0.1';
$cache->save(time() + 10, \VersionFeed\Filters\RateLimitFilter::CACHE_PREFIX . '_' . md5('127.0.0.1'));
$response = $this->get($page1->RelativeLink('changes'));
$this->assertEquals(429, $response->getStatusCode());
$this->assertGreaterThan(0, $response->getHeader('Retry-After'));
// Test rate limit doesn't hit other IP
$_SERVER['HTTP_CLIENT_IP'] = '127.0.0.20';
$cache->save(time() + 10, \VersionFeed\Filters\RateLimitFilter::CACHE_PREFIX . '_' . md5('127.0.0.1'));
$response = $this->get($page1->RelativeLink('changes'));
$this->assertEquals(200, $response->getStatusCode());
// Restore setting
Config::inst()->update('VersionFeed\Filters\RateLimitFilter', 'lock_byuserip', false);
}
public function testContainsChangesForPageOnly() {
$page1 = $this->createPageWithChanges(array('Title' => 'Page1'));