diff --git a/_config.php b/_config.php index 36ccb11..fdd4107 100644 --- a/_config.php +++ b/_config.php @@ -1,7 +1,4 @@ 'Boolean' + 'PublicHistory' => 'Boolean(true)' ); private static $defaults = array( diff --git a/code/VersionFeed_Controller.php b/code/VersionFeed_Controller.php index a1f22b3..a59507a 100644 --- a/code/VersionFeed_Controller.php +++ b/code/VersionFeed_Controller.php @@ -6,8 +6,39 @@ class VersionFeed_Controller extends Extension { 'changes', 'allchanges' ); + + /** + * Content handler + * + * @var ContentFilter + */ + protected $contentFilter; + + /** + * Sets the content filter + * + * @param ContentFilter $contentFilter + */ + public function setContentFilter(ContentFilter $contentFilter) { + $this->contentFilter = $contentFilter; + } + + /** + * Evaluates the result of the given callback + * + * @param string $key Unique key for this + * @param callable $callback Callback for evaluating the content + * @return mixed Result of $callback() + */ + protected function filterContent($key, $callback) { + if($this->contentFilter) { + return $this->contentFilter->getContent($key, $callback); + } else { + return call_user_func($callback); + } + } - function onAfterInit() { + public function onAfterInit() { // RSS feed for per-page changes. if ($this->owner->PublicHistory) { RSSFeed::linkToFeed($this->owner->Link() . 'changes', @@ -26,18 +57,15 @@ class VersionFeed_Controller extends Extension { /** * Get page-specific changes in a RSS feed. */ - function changes() { - if(!$this->owner->PublicHistory) throw new SS_HTTPResponse_Exception('Page history not viewable', 404);; + public function changes() { + if(!$this->owner->PublicHistory) throw new SS_HTTPResponse_Exception('Page history not viewable', 404); // Cache the diffs to remove DOS possibility. - $cache = SS_Cache::factory('VersionFeed_Controller'); - $cache->setOption('automatic_serialization', true); + $target = $this->owner; $key = implode('_', array('changes', $this->owner->ID, $this->owner->Version)); - $entries = $cache->load($key); - if(!$entries || isset($_GET['flush'])) { - $entries = $this->owner->getDiffedChanges(); - $cache->save($entries, $key); - } + $entries = $this->filterContent($key, function() use ($target) { + return $target->getDiffedChanges(); + }); // Generate the output. $title = sprintf(_t('RSSHistory.SINGLEPAGEFEEDTITLE', 'Updates to %s page'), $this->owner->Title); @@ -49,42 +77,47 @@ class VersionFeed_Controller extends Extension { /** * Get all changes from the site in a RSS feed. */ - function allchanges() { + public function allchanges() { - $latestChanges = DB::query('SELECT * FROM "SiteTree_versions" WHERE "WasPublished"=\'1\' AND "CanViewType" IN (\'Anyone\', \'Inherit\') AND "ShowInSearch"=1 AND ("PublicHistory" IS NULL OR "PublicHistory" = \'1\') ORDER BY "LastEdited" DESC LIMIT 20'); + $latestChanges = DB::query(' + SELECT * FROM "SiteTree_versions" + WHERE "WasPublished" = \'1\' + AND "CanViewType" IN (\'Anyone\', \'Inherit\') + AND "ShowInSearch" = 1 + AND ("PublicHistory" IS NULL OR "PublicHistory" = \'1\') + ORDER BY "LastEdited" DESC LIMIT 20' + ); $lastChange = $latestChanges->record(); $latestChanges->rewind(); if ($lastChange) { // Cache the diffs to remove DOS possibility. - $member = Member::currentUser(); - $cache = SS_Cache::factory('VersionFeed_Controller'); - $cache->setOption('automatic_serialization', true); - $key = 'allchanges' . preg_replace('#[^a-zA-Z0-9_]#', '', $lastChange['LastEdited']) . - ($member ? $member->ID : 'public'); - - $changeList = $cache->load($key); - if(!$changeList || isset($_GET['flush'])) { - + $key = 'allchanges' + . preg_replace('#[^a-zA-Z0-9_]#', '', $lastChange['LastEdited']) + . (Member::currentUserID() ?: 'public'); + $changeList = $this->filterContent($key, function() use ($latestChanges) { $changeList = new ArrayList(); - + $canView = array(); foreach ($latestChanges as $record) { + // Check if the page should be visible. // WARNING: although we are providing historical details, we check the current configuration. - $page = SiteTree::get()->filter(array('ID'=>$record['RecordID']))->First(); - if (!$page->canView(new Member())) continue; + $id = $record['RecordID']; + if(!isset($canView[$id])) { + $page = SiteTree::get()->byID($id); + $canView[$id] = $page && $page->canView(new Member()); + } + if (!$canView[$id]) continue; // Get the diff to the previous version. $version = new Versioned_Version($record); - $changes = $version->getDiffedChanges($version->Version, false); if ($changes && $changes->Count()) $changeList->push($changes->First()); } - $cache->save($changeList, $key); - } - + return $changeList; + }); } else { $changeList = new ArrayList(); } diff --git a/code/caching/CachedContentFilter.php b/code/caching/CachedContentFilter.php new file mode 100644 index 0000000..7173379 --- /dev/null +++ b/code/caching/CachedContentFilter.php @@ -0,0 +1,22 @@ +getCache(); + + // Return cached value if available + $result = isset($_GET['flush']) + ? null + : $cache->load($key); + if($result) return $result; + + // Fallback to generate result + $result = parent::getContent($key, $callback); + $cache->save($result, $key); + return $result; + } +} diff --git a/code/caching/ContentFilter.php b/code/caching/ContentFilter.php new file mode 100644 index 0000000..f0e5cb4 --- /dev/null +++ b/code/caching/ContentFilter.php @@ -0,0 +1,45 @@ +nestedContentFilter = $nestedContentFilter; + } + + /** + * Gets the cache to use + * + * @return Zend_Cache_Frontend + */ + protected function getCache() { + $cache = SS_Cache::factory('VersionFeed_Controller'); + $cache->setOption('automatic_serialization', true); + return $cache; + } + + /** + * Evaluates the result of the given callback + * + * @param string $key Unique key for this + * @param callable $callback Callback for evaluating the content + * @return mixed Result of $callback() + */ + public function getContent($key, $callback) { + if($this->nestedContentFilter) { + return $this->nestedContentFilter->getContent($key, $callback); + } else { + return call_user_func($callback); + } + } +} diff --git a/code/caching/RateLimitFilter.php b/code/caching/RateLimitFilter.php new file mode 100644 index 0000000..181d2af --- /dev/null +++ b/code/caching/RateLimitFilter.php @@ -0,0 +1,92 @@ +get(get_class(), 'lock_bypage')) { + $key .= '_' . md5($itemkey); + } + + // Add user-specific identifier + if(Config::inst()->get(get_class(), 'lock_byuserip') && Controller::has_curr()) { + $ip = Controller::curr()->getRequest()->getIP(); + $key .= '_' . md5($ip); + } + + return $key; + } + + + public function getContent($key, $callback) { + // Bypass rate limiting if flushing, or timeout isn't set + $timeout = Config::inst()->get(get_class(), 'lock_timeout'); + if(isset($_GET['flush']) || !$timeout) { + return parent::getContent($key, $callback); + } + + // Generate result with rate limiting enabled + $limitKey = $this->getCacheKey($key); + $cache = $this->getCache(); + if($cacheBegin = $cache->load($limitKey)) { + if(time() - $cacheBegin < $timeout) { + // Politely inform visitor of limit + $response = new SS_HTTPResponse_Exception('Too Many Requests.', 429); + $response->getResponse()->addHeader('Retry-After', 1 + time() - $cacheBegin); + throw $response; + } + } + + // Generate result with rate limit locked + $cache->save(time(), $limitKey); + $result = parent::getContent($key, $callback); + $cache->remove($limitKey); + return $result; + } +} diff --git a/docs/en/changelogs/1.0.3.md b/docs/en/changelogs/1.0.3.md new file mode 100644 index 0000000..3e12d28 --- /dev/null +++ b/docs/en/changelogs/1.0.3.md @@ -0,0 +1,7 @@ +# 1.0.3 + +## Upgrading + + * Caching and rate limiting rules now apply in this version, and it may be necessary to adjust the values + for `RateLimitFilter.lock_timeout`, `RateLimitFilter.lock_bypage` and `RateLimitFilter.lock_byuserip` configs. + See the [Developer Documentation](../developer.md) for information on how these affect cache performance and behaviour. diff --git a/docs/en/developer.md b/docs/en/developer.md index f046ea1..0173656 100644 --- a/docs/en/developer.md +++ b/docs/en/developer.md @@ -21,3 +21,35 @@ and returning the URL of your desired RSS feed: } This can be used in templates as `$DefaultRSSLink`. + +### Rate limiting and caching + +By default all content is filtered based on the rules specified in `versionfeed/_config/versionfeed.yml`. +Two filters are applied on top of one another: + + * `CachedContentFilter` provides caching of versions based on an identifier built up of the record ID and the + most recently saved version number. There is no configuration required for this class. + * `RateLimitFilter` provides rate limiting to ensure that requests to uncached data does not overload the + server. This filter will only be applied if the `CachedContentFilter` does not have any cached record + for a request. + +Either one of these can be replaced, added to, or removed, by adjusting the `VersionFeed_Controller.dependencies` +config to point to a replacement (or no) filter. + +For large servers with a high level of traffic (more than 1 visits every 10 seconds) the default settings should +be sufficient. + +For smaller servers where it's reasonable to apply a more strict approach to rate limiting, consider setting the +`RateLimitFilter.lock_bypage` config to false, meaning that a single limit will be applied to all URLs. +If left on true, then each URL will have its own rate limit, and on smaller servers with lots of +concurrent requests this can still overwhelm capacity. This will also leave smaller servers vulnerable to DDoS +attacks which target many URLs simultaneously. This config will have no effect on the `allchanges` method. + +`RateLimitFilter.lock_byuserip` can also be set to true in order to prevent requests from different users +interfering with one another. However, this can provide an ineffective safeguard against malicious DDoS attacks +which use multiple IP addresses. + +Another important variable is the `RateLimitFilter.lock_timeout` config, which is set to 10 seconds by default. +This should be increased on sites which may be slow to generate page versions, whether due to lower +server capacity or volume of content (number of page versions). Requests to this page after the timeout +will not trigger any rate limit safeguard, so you should be sure that this is set to an appropriate level. diff --git a/tests/VersionFeedFunctionalTest.php b/tests/VersionFeedFunctionalTest.php index ce899a1..bb8ea71 100644 --- a/tests/VersionFeedFunctionalTest.php +++ b/tests/VersionFeedFunctionalTest.php @@ -5,12 +5,29 @@ class VersionFeedFunctionalTest extends FunctionalTest { 'Page' => array('VersionFeed'), 'Page_Controller' => array('VersionFeed_Controller'), ); + + protected $userIP; public function setUp() { parent::setUp(); $cache = SS_Cache::factory('VersionFeed_Controller'); $cache->clean(Zend_Cache::CLEANING_MODE_ALL); + + $this->userIP = isset($_SERVER['HTTP_CLIENT_IP']) ? $_SERVER['HTTP_CLIENT_IP'] : null; + + Config::nest(); + Config::inst()->update('RateLimitFilter', 'lock_timeout', 20); + Config::inst()->update('RateLimitFilter', 'lock_bypage', false); + Config::inst()->update('RateLimitFilter', 'lock_byuserip', false); + } + + public function tearDown() { + Config::unnest(); + + $_SERVER['HTTP_CLIENT_IP'] = $this->userIP; + + parent::tearDown(); } public function testPublicHistory() { @@ -36,6 +53,59 @@ class VersionFeedFunctionalTest extends FunctionalTest { $xml = simplexml_load_string($response->getBody()); $this->assertTrue((bool)$xml->channel->item); } + + public function testRateLimiting() { + + $page1 = $this->createPageWithChanges(array('PublicHistory' => true, 'Title' => 'Page1')); + $page2 = $this->createPageWithChanges(array('PublicHistory' => true, 'Title' => 'Page2')); + + // Artifically set cache lock + Config::inst()->update('RateLimitFilter', 'lock_byuserip', false); + $cache = SS_Cache::factory('VersionFeed_Controller'); + $cache->setOption('automatic_serialization', true); + $cache->save(time() - 2, RateLimitFilter::CACHE_PREFIX); + + // Test normal hit + $response = $this->get($page1->RelativeLink('changes')); + $this->assertEquals(429, $response->getStatusCode()); + $this->assertGreaterThan(0, $response->getHeader('Retry-After')); + $response = $this->get($page2->RelativeLink('changes')); + $this->assertEquals(429, $response->getStatusCode()); + $this->assertGreaterThan(0, $response->getHeader('Retry-After')); + + // Test page specific lock + Config::inst()->update('RateLimitFilter', 'lock_bypage', true); + $key = implode('_', array( + 'changes', + $page1->ID, + Versioned::get_versionnumber_by_stage('SiteTree', 'Live', $page1->ID, false) + )); + $key = RateLimitFilter::CACHE_PREFIX . '_' . md5($key); + $cache->save(time() - 2, $key); + $response = $this->get($page1->RelativeLink('changes')); + $this->assertEquals(429, $response->getStatusCode()); + $this->assertGreaterThan(0, $response->getHeader('Retry-After')); + $response = $this->get($page2->RelativeLink('changes')); + $this->assertEquals(200, $response->getStatusCode()); + Config::inst()->update('RateLimitFilter', 'lock_bypage', false); + + // Test rate limit hit by IP + Config::inst()->update('RateLimitFilter', 'lock_byuserip', true); + $_SERVER['HTTP_CLIENT_IP'] = '127.0.0.1'; + $cache->save(time() - 2, RateLimitFilter::CACHE_PREFIX . '_' . md5('127.0.0.1')); + $response = $this->get($page1->RelativeLink('changes')); + $this->assertEquals(429, $response->getStatusCode()); + $this->assertGreaterThan(0, $response->getHeader('Retry-After')); + + // Test rate limit doesn't hit other IP + $_SERVER['HTTP_CLIENT_IP'] = '127.0.0.20'; + $cache->save(time() - 2, RateLimitFilter::CACHE_PREFIX . '_' . md5('127.0.0.1')); + $response = $this->get($page1->RelativeLink('changes')); + $this->assertEquals(200, $response->getStatusCode()); + + // Restore setting + Config::inst()->update('RateLimitFilter', 'lock_byuserip', false); + } public function testContainsChangesForPageOnly() { $page1 = $this->createPageWithChanges(array('Title' => 'Page1'));