Merge pull request #811 from wilr/removestatic

API: Remove static main and dev/buildcache
This commit is contained in:
Sean Harvey 2012-09-24 19:12:35 -07:00
commit 2654290848
5 changed files with 3 additions and 394 deletions

View File

@ -1,7 +1,7 @@
<FilesMatch "\.(php|php3|php4|php5|phtml|inc)$">
Deny from all
</FilesMatch>
<FilesMatch "(main|static-main|rpc|tiny_mce_gzip)\.php$">
<FilesMatch "(main|rpc|tiny_mce_gzip)\.php$">
Allow from all
</FilesMatch>
<FilesMatch "silverstripe_version$">

View File

@ -76,7 +76,6 @@ class DevelopmentAdmin extends Controller {
public function index() {
$actions = array(
"build" => "Build/rebuild this environment. Call this whenever you have updated your project sources",
"buildcache" => "Rebuild the static cache, if you're using StaticPublisher",
"tests" => "See a list of unit tests to run",
"tests/all" => "Run all tests",
"tests/startsession" => "Start a test session in your browser (gives you a temporary database with default content)",

View File

@ -1,283 +0,0 @@
# Static Publisher
## Introduction
Many sites get too much traffic to justify dynamically sending every request. Caching is needed. Static Publishing
will generate static versions of your content (HTML) that can be served without ever hitting PHP or the Database.
See `[api:StaticExporter]` for a less flexible, but easier way of building a local static cache from all of
your pages.
See [Partial-Caching](partial-caching) for a much more flexible way of building in caching without statically delivering
content. Partial Caching is recommended as a basic enhancement to any SilverStripe site however if your site is planning
a vast amount of traffic (eg an article is being dug) then Static Publisher will be appropriate.
## Usage
SilverStripe doesn't have enough information about your template and data-structures to automatically determine which
URLs need to be cached, and at which time they are considered outdated. By adding a custom method allPagesToCache() to
your Page class, you can determine which URLs need caching, and hook in custom logic. This array of URLs is used by the
publisher to generate folders and HTML-files.
:::php
class Page extends SiteTree {
// ...
/**
* Return a list of all the pages to cache
*/
public function allPagesToCache() {
// Get each page type to define its sub-urls
$urls = array();
// memory intensive depending on number of pages
$pages = SiteTree::get();
foreach($pages as $page) {
$urls = array_merge($urls, (array)$page->subPagesToCache());
}
// add any custom URLs which are not SiteTree instances
$urls[] = "sitemap.xml";
return $urls;
}
/**
* Get a list of URLs to cache related to this page
*/
public function subPagesToCache() {
$urls = array();
// add current page
$urls[] = $this->Link();
// cache the RSS feed if comments are enabled
if ($this->ProvideComments) {
$urls[] = Director::absoluteBaseURL() . "pagecomment/rss/" . $this->ID;
}
return $urls;
}
public function pagesAffectedByChanges() {
$urls = $this->subPagesToCache();
if($p = $this->Parent) $urls = array_merge((array)$urls, (array)$p->subPagesToCache());
return $urls;
}
}
## Excluding Pages
The allPagesToCache function returns all the URLs needed to cache. So if you want to exclude specific pages from the
cache then you unset these URLs from the returned array. If you do not want to cache a specific class (eg UserDefinedForms)
you can also add an exclusion
:::php
public function allPagesToCache() {
$urls = array();
$pages = SiteTree::get();
// ignored page types
$ignored = array('UserDefinedForm');
foreach($pages as $page) {
// check to make sure this page is not in the classname
if(!in_array($page->ClassName, $ignored)) {
$urls = array_merge($urls, (array)$page->subPagesToCache());
}
}
return $urls;
}
You can also pass the filtering to the original `SiteTree::get()`;
:::php
public function allPagesToCache() {
$urls = array();
$pages = SiteTree::get()->where("ClassName != 'UserDefinedForm'");
...
## Single server Caching
This setup will store the cached content on the same server as the CMS. This is good for a basic performance enhancement.
### Setup
Put this in mysite/_config.php. This will create static content in a "cache/" subdirectory, with an HTML suffix.
:::php
Object::add_extension("SiteTree", "FilesystemPublisher('cache/', 'html')");
* Put this into your .htaccess. It will serve requests from the cache, statically, if the cache file exists. Replace
**sitedir** with the a subdirectory that you would like to serve the site from (for example, in your dev environment).
[View .htaccess
example](http://open.silverstripe.com/browser/modules/cms/trunk/code/staticpublisher/htaccess_example_rsyncsingleserver)
* We use a simple PHP script, static-main.php, to control cache lookup. This makes the .htaccess update simpler.
Just look for this line:
RewriteRule .* framework/main.php?url=%1&%{QUERY_STRING} [L]
And change the PHP script from main.php to static-main.php:
RewriteRule .* framework/static-main.php?url=%1&%{QUERY_STRING} [L]
## Using Static Publisher With Subsites Module
Append the following code to mysite/config.php
:::php
FilesystemPublisher::$domain_based_caching = true;
Instead of the above code snippet for Page.php, use the following code:
:::php
class Page extends SiteTree {
// ...
public function allPagesToCache() {
// Get each page type to define its sub-urls
$urls = array();
// memory intensive depending on number of pages
$pages = Subsite::get_from_all_subsites("SiteTree");
foreach($pages as $page) {
$urls = array_merge($urls, (array)$page->subPagesToCache());
}
return $urls;
}
public function subPagesToCache() {
$urls = array();
$urls[] = $this->AbsoluteLink();
return $urls;
}
public function pagesAffectedByChanges() {
$urls = $this->subPagesToCache();
if($p = $this->Parent) $urls = array_merge((array)$urls, (array)$p->subPagesToCache());
return $urls;
}
// ... some other code ...
}
And the last thing you need to do is adding your main site's host mapping to subsites/host-map.php. For example, your
main site's host is mysite.com the content of the file would be:
:::php
<?php
$subsiteHostmap = array (
// .. subsite hots mapping ..,
'mysite.com', 'mysite.com'
);
Remember that you need to add main site's host mapping every time a subsite is added or modified because the operation
overwrites your manual modification to the file and subsite module does not add main site's hot mapping automatically at
the moment.
Another note for host-map.php file. This file doesn't not exist until you have created at least one subsite.
## Multiple Server Caching
In this setup, you have one server that is your dynamic CMS server, and one or more separate servers that are
responsible for serving static content. The publication system on the CMS will rsync changes to the static content
servers as needed. No PHP files will be synced to the static content servers unless explicitly requested. All static
assets (images, javascript, etc.) will be rsynced from their original locations. You can then put a load-balancer on the
front of the static content servers.
This approach is very secure, because you can lock the CMS right down (for example, by IP) and hide all the PHP code
away from potential hackers. It is also good for high-traffic situations.
### Setup
Add the RsyncMultiHostPublisher extension to your SiteTree objects in mysite/_config.php. This will create static
content in a "cache/" subdirectory, with an HTML suffix.
:::php
Object::add_extension("SiteTree", "RsyncMultiHostPublisher('cache/', 'html')");
RsyncMultiHostPublisher::set_targets(array(
'<rsyncuser>@<static-server1>:<webroot>',
'<rsyncuser>@<static-server2>:<webroot>',
));
Where `<rsyncuser>` is a unix account with write permissions to `<webroot>` (e.g. `/var/www`), and
`<static-server1>` and `<static-server2>` are the names of your static content servers. The number of servers is
flexible and depends on your infrastructure and scalability needs.
* Ensure that the `rsync` unix tool is installed on the CMS server, and ssh access is enabled on the static content
servers.
* No password can be specified for the SSH connection . The class assumes a key-based authentication without requiring
a password for the username specified in `<rsyncuser>` (see [http://www.csua.berkeley.edu/~ranga/notes/ssh_nopass.html
tutorial](http://www.csua.berkeley.edu/~ranga/notes/ssh_nopass.html tutorial)).
* Put the .htaccess file linked below into the webroot of each static content server (and rename it to `.htaccess`).
It will serve requests from the cache, statically, if the cache file exists. Replace **sitedir** with the a
subdirectory that you would like to serve the site from (for example, in your dev environment).
[View .htaccess
example](http://open.silverstripe.com/browser/modules/cms/trunk/code/staticpublisher/htaccess_example_rsyncmultiservers)
## Cache Control
There is also the option to wrap some PHP logic around the static HTML content served by the content servers, which can
greatly reduce the bandwidth required on your content servers. This code takes care of cache control through HTTP
headers (''Cache-control'', `If-modified-since`), meaning the files will only be delivered if they changed since the
browser client last requested them. The last modification date for each static file is controlled by the publication
script, meaning the cache gets invalidated on each publication.
To enable cache control, specify "php" instead of "html" in the RsyncMultiHostPublisher definition.
:::php
Object::add_extension("SiteTree", "RsyncMultiHostPublisher('cache/', 'php')");
And use this slightly different .htaccess file. Make sure that index.php can be used as a directory index!
[View .htaccess
example](http://open.silverstripe.com/browser/modules/cms/trunk/code/staticpublisher/htaccess_example_rsyncwithphp)
## Deployment
Once you've set up your rewrite rules and defined which pages need caching, you can build the static HTML files. This is
done by the `[api:RebuildStaticCacheTask]`
Execution via URL
http://www.example.com/dev/buildcache?flush=1
Execution on CLI (via [sake](/topics/commandline))
sake dev/buildcache flush=1
Depending on which extension you've set up for your SiteTree (FilesystemPublisher or RsyncMultiHostPublisher), the
method publishPages() either stores the generated HTML-files on the server's filesystem, or deploys them to other
servers via rsync.
It is adviseable to set dev/buildcache up as an automated task (e.g. unix cron) which continually rebuilds and redeploys
the cache.
## Related
* `[api:StaticExporter]`
* [Partial-Caching](partial-caching)
## API Documentation
* `[api:StaticPublisher]`

View File

@ -103,6 +103,8 @@ if (substr(strtolower($url), 0, strlen(BASE_URL)) == strtolower(BASE_URL)) $url
// Connect to database
require_once('model/DB.php');
global $databaseConfig;
// Redirect to the installer if no database is selected
if(!isset($databaseConfig) || !isset($databaseConfig['database']) || !$databaseConfig['database']) {
if(!file_exists(BASE_PATH . '/install.php')) {

View File

@ -1,109 +0,0 @@
<?php
/**
* This file is designed to be the new 'server' of sites using StaticPublisher.
* to use this, you need to modify your .htaccess to point all requests to
* static-main.php, rather than main.php. This file also allows for using
* static publisher with the subsites module.
*
* If you are using StaticPublisher+Subsites, set the following in _config.php:
* FilesystemPublisher::$domain_based_caching = true;
* and added main site host mapping in subsites/host-map.php after everytime a new subsite is created or modified
*
* If you are not using subsites, the host-map.php file will not exist (it is
* automatically generated by the Subsites module) and the cache will default
* to no subdirectory.
*/
$cacheEnabled = true;
$cacheDebug = false;
$cacheBaseDir = '../cache/'; // Should point to the same folder as FilesystemPublisher->destFolder
// Optional settings for FilesystemPublisher::$domain_based_mapping=TRUE
$hostmapLocation = '../subsites/host-map.php';
// Specific to 'homepagefordomain' module
$homepageMapLocation = '../assets/_homepage-map.php';
if (
$cacheEnabled
&& empty($_COOKIE['bypassStaticCache'])
// No GET params other than cache relevant config is passed (e.g. "?stage=Stage"),
// which would mean that we have to bypass the cache
&& count(array_diff(array_keys($_GET), array('url', 'cacheSubdir'))) == 0
// Request is not POST (which would have to be handled dynamically)
&& count($_POST) == 0
) {
// Define system paths (copied from Core.php)
if(!defined('BASE_PATH')) {
// Assuming that this file is framework/static-main.php we can then determine the base path
define('BASE_PATH', rtrim(dirname(dirname(__FILE__))), DIRECTORY_SEPARATOR);
}
if(!defined('BASE_URL')) {
// Determine the base URL by comparing SCRIPT_NAME to SCRIPT_FILENAME and getting common elements
$path = realpath($_SERVER['SCRIPT_FILENAME']);
if(substr($path, 0, strlen(BASE_PATH)) == BASE_PATH) {
$urlSegmentToRemove = substr($path, strlen(BASE_PATH));
if(substr($_SERVER['SCRIPT_NAME'], -strlen($urlSegmentToRemove)) == $urlSegmentToRemove) {
$baseURL = substr($_SERVER['SCRIPT_NAME'], 0, -strlen($urlSegmentToRemove));
define('BASE_URL', rtrim($baseURL, DIRECTORY_SEPARATOR));
}
}
}
$url = $_GET['url'];
// Remove base folders from the URL if webroot is hosted in a subfolder
if (substr(strtolower($url), 0, strlen(BASE_URL)) == strtolower(BASE_URL)) {
$url = substr($url, strlen(BASE_URL));
}
$host = str_replace('www.', '', $_SERVER['HTTP_HOST']);
// Custom cache dir for debugging purposes
if (isset($_GET['cacheSubdir']) && !preg_match('/[^a-zA-Z0-9\-_]/', $_GET['cacheSubdir'])) {
$cacheDir = $_GET['cacheSubdir'].'/';
}
// Custom mapping through PHP file (assumed FilesystemPublisher::$domain_based_mapping=TRUE)
else if (file_exists($hostmapLocation)) {
include_once $hostmapLocation;
$subsiteHostmap['default'] = isset($subsiteHostmap['default']) ? $subsiteHostmap['default'] : '';
$cacheDir = (isset($subsiteHostmap[$host]) ? $subsiteHostmap[$host] : $subsiteHostmap['default']) . '/';
}
// No subfolder (for FilesystemPublisher::$domain_based_mapping=FALSE)
else {
$cacheDir = '';
}
// Look for the file in the cachedir
$file = trim($url, '/');
$file = $file ? $file : 'index';
// Route to the 'correct' index file (if applicable)
if ($file == 'index' && file_exists($homepageMapLocation)) {
include_once $homepageMapLocation;
$file = isset($homepageMap[$_SERVER['HTTP_HOST']]) ? $homepageMap[$_SERVER['HTTP_HOST']] : $file;
}
// Encode each part of the path individually, in order to support multibyte paths.
// SiteTree.URLSegment and hence the static folder and filenames are stored in encoded form,
// to avoid filesystem incompatibilities.
$file = implode('/', array_map('rawurlencode', explode('/', $file)));
// Find file by extension (either *.html or *.php)
if (file_exists($cacheBaseDir . $cacheDir . $file . '.html')) {
header('X-SilverStripe-Cache: hit at '.@date('r'));
echo file_get_contents($cacheBaseDir . $cacheDir . $file . '.html');
if ($cacheDebug) echo "<h1>File was cached</h1>";
} elseif (file_exists($cacheBaseDir . $cacheDir . $file . '.php')) {
header('X-SilverStripe-Cache: hit at '.@date('r'));
include_once $cacheBaseDir . $cacheDir . $file . '.php';
if ($cacheDebug) echo "<h1>File was cached</h1>";
} else {
header('X-SilverStripe-Cache: miss at '.@date('r') . ' on ' . $cacheDir . $file);
// No cache hit... fallback to dynamic routing
include 'main.php';
if ($cacheDebug) echo "<h1>File was NOT cached</h1>";
}
} else {
// Fall back to dynamic generation via normal routing if caching has been explicitly disabled
include 'main.php';
}