diff --git a/docs/_manifest_exclude b/docs/_manifest_exclude new file mode 100644 index 0000000..e69de29 diff --git a/docs/en/StaticExporter.md b/docs/en/StaticExporter.md new file mode 100644 index 0000000..a6b0c1e --- /dev/null +++ b/docs/en/StaticExporter.md @@ -0,0 +1,44 @@ +# Static Exporter + +## Introduction + +StaticExporter allows you to export a static copy of your website either as a +tar.gz archive or to a separate folder. It does this by saving every page and +other registered URL to the file system. You can then server the exported +website on your production server or use it as a back up system. + +## Requirements + + - Unix filesystem + - Tar installed + +
+ This has not been tested on Windows +
+ +## Usage + +There are three ways the StaticExporter can be invoked depending on your use +case. + +### GUI + +If you're logged into your site as an administrator or your website is in +development mode, you can access the GUI for generating the export at: +http://yoursite.com/StaticExporter/. The GUI allows you to select a few +configuration options then will generate a tar.gz archive of the website. + +### StaticExporterTask + +Accessing http://yoursite.com/dev/tasks/StaticExporterTask will generate the +export of the website and save it to a folder on your filesystem. Unlike the +GUI option this does not allow you to configure options in the browser, instead +it relies on the developer setting the options via statics (as deploy) + +### Sake + +To generate the export via command line ([sake](framework/en/topics/commandline.md)) + + sake dev/tasks/StaticExporterTask + +## Options \ No newline at end of file diff --git a/docs/en/StaticPublisher.md b/docs/en/StaticPublisher.md new file mode 100644 index 0000000..0e86983 --- /dev/null +++ b/docs/en/StaticPublisher.md @@ -0,0 +1,432 @@ +# Static Publisher + +## Introduction + +Many sites get too much traffic to justify dynamically sending every request +directly from SilverStripe. Caching is used to avoid hitting SilverStripe or +the database. + +Static Publishing will generate static versions of your content (HTML, XML) +whenever you publish a web page in the CMS and allow you to publish the page +as HTML. + +See `[StaticExporter]` for a less flexible, but easier way of building a +local static cache from all of your pages. + +See [Partial-Caching](partial-caching) for a much more flexible way of building +in caching without delivering static content. Partial Caching is recommended as +a basic enhancement to any SilverStripe site however if your site is planning a +vast amount of traffic (eg an article is being dug) then Static Publisher will +be appropriate. + +## Usage + +SilverStripe doesn't have enough information about your template and data +structures to automatically determine which URLs need to be cached, and at +which time they are considered outdated. By adding a custom method +`allPagesToCache() to your Page class, you can determine which URLs need +caching, and hook in custom logic. This array of URLs is used by the publisher +to generate folders and HTML-files. + + :::php + class Page extends SiteTree { + // ... + + /** + + * Return a list of all the pages to cache + */ + public function allPagesToCache() { + // Get each page type to define its sub-urls + $urls = array(); + + // memory intensive depending on number of pages + $pages = SiteTree::get(); + + foreach($pages as $page) { + $urls = array_merge($urls, (array)$page->subPagesToCache()); + } + + // add any custom URLs which are not SiteTree instances + $urls[] = "sitemap.xml"; + + return $urls; + } + + /** + + * Get a list of URLs to cache related to this page + */ + public function subPagesToCache() { + $urls = array(); + + // add current page + $urls[] = $this->Link(); + + // cache the RSS feed if comments are enabled + if ($this->ProvideComments) { + $urls[] = Director::absoluteBaseURL() . "pagecomment/rss/" . $this->ID; + } + + return $urls; + } + + public function pagesAffectedByChanges() { + $urls = $this->subPagesToCache(); + if($p = $this->Parent) $urls = array_merge((array)$urls, (array)$p->subPagesToCache()); + return $urls; + } + } + +## Excluding Pages + +The allPagesToCache function returns all the URLs needed to cache. So if you +want to exclude specific pages from the cache then you unset these URLs from +the returned array. If you do not want to cache a specific class (eg +UserDefinedForms) you can also add an exclusion + + :::php + class Page extends SiteTree { + // .. + + public function allPagesToCache() { + $urls = array(); + $pages = SiteTree::get(); + + // ignored page types + $ignored = array('UserDefinedForm'); + + foreach($pages as $page) { + // check to make sure this page is not in the classname + if(!in_array($page->ClassName, $ignored)) { + $urls = array_merge($urls, (array)$page->subPagesToCache()); + } + } + + return $urls; + } + +You can also pass the filtering to the original `SiteTree::get()`; + + :::php + public function allPagesToCache() { + $urls = array(); + $pages = SiteTree::get()->where("ClassName != 'UserDefinedForm'"); + ... + +## Single server Caching + +This setup will store the cached content on the same server as the CMS. This +is good for a basic performance enhancement. + +### Setup + +Put this in mysite/_config.php. This will create static content in a +"cache/" subdirectory, with an HTML suffix. + + :::php + Object::add_extension("SiteTree", "FilesystemPublisher('cache/', 'html')"); + + +* Put this into your .htaccess. It will serve requests from the cache, +statically, if the cache file exists. Replace **sitedir** with the a sub +directory that you would like to serve the site from (for example, in +your dev environment). + + RewriteEngine On + + ## CONFIG FOR DEV ENVIRONMENTS + + # Cached content - **sitedir** subdirectory + RewriteCond %{REQUEST_METHOD} ^GET$ + RewriteCond %{QUERY_STRING} ^$ + RewriteCond %{REQUEST_URI} ^/**sitedir**/(.*)$ + RewriteCond %{REQUEST_URI} /**sitedir**/(.*[^/])/?$ + RewriteCond %{DOCUMENT_ROOT}/**sitedir**/cache/%1.html -f + RewriteCond %{REQUEST_FILENAME} !-f + RewriteRule .* /**sitedir**/cache/%1.html [L] + + # Cached content - homepage + RewriteCond %{REQUEST_METHOD} ^GET$ + RewriteCond %{QUERY_STRING} ^$ + RewriteCond %{REQUEST_URI} ^/**sitedir**/?$ + RewriteCond /**sitedir**/cache/index.html -f + RewriteCond %{REQUEST_FILENAME} !-f + RewriteRule .* /**sitedir**/cache/index.html [L] + + ## CONFIG FOR TEST/LIVE ENVIRONMENTS + + # Cached content - live webserver + RewriteCond %{REQUEST_METHOD} ^GET$ + RewriteCond %{QUERY_STRING} ^$ + RewriteCond %{REQUEST_URI} /(.*[^/])/?$ + RewriteCond %{DOCUMENT_ROOT}/cache/%1.html -f + RewriteCond %{REQUEST_FILENAME} !-f + RewriteRule .* /cache/%1.html [L] + + # Cached content - homepage + RewriteCond %{REQUEST_METHOD} ^GET$ + RewriteCond %{QUERY_STRING} ^$ + RewriteCond %{REQUEST_URI} ^/?$ + RewriteCond %{DOCUMENT_ROOT}/cache/index.html -f + RewriteCond %{REQUEST_FILENAME} !-f + RewriteRule .* /cache/index.html [L] + + ## DYNAMIC CONFIG + + # Dynamic content + RewriteCond %{REQUEST_URI} !(\.gif)|(\.jpg)|(\.png)|(\.css)|(\.js)|(\.php)$ + RewriteCond %{REQUEST_URI} ^(.*)$ + RewriteCond %{REQUEST_FILENAME} !-f + RewriteRule .* static/code/main.php?url=%1&%{QUERY_STRING} [L] + ### SILVERSTRIPE END ### + + +* We use a simple PHP script, static-main.php, to control cache lookup. This +makes the .htaccess update simpler. + +Just look for this line: + + RewriteRule .* framework/main.php?url=%1&%{QUERY_STRING} [L] + +And change the PHP script from main.php to static-main.php: + + RewriteRule .* static/code/main.php?url=%1&%{QUERY_STRING} [L] + +## Using Static Publisher With Subsites Module + +Append the following code to mysite/config.php + + :::php + FilesystemPublisher::$domain_based_caching = true; + + +Instead of the above code snippet for Page.php, use the following code: + + :::php + class Page extends SiteTree { + + // ... + + public function allPagesToCache() { + // Get each page type to define its sub-urls + $urls = array(); + + // memory intensive depending on number of pages + $pages = Subsite::get_from_all_subsites("SiteTree"); + + foreach($pages as $page) { + $urls = array_merge($urls, (array)$page->subPagesToCache()); + } + + return $urls; + } + + public function subPagesToCache() { + $urls = array(); + $urls[] = $this->AbsoluteLink(); + return $urls; + } + + public function pagesAffectedByChanges() { + $urls = $this->subPagesToCache(); + if($p = $this->Parent) $urls = array_merge((array)$urls, (array)$p->subPagesToCache()); + return $urls; + } + + // ... some other code ... + + } + + +And the last thing you need to do is adding your main site's host mapping to +subsites/host-map.php. For example, your main site's host is mysite.com the +content of the file would be: + + :::php + @:', + '@:', + )); + + +Where `` is a unix account with write permissions to `` +(e.g. `/var/www`), and `` and `` are the names +of your static content servers. The number of servers is flexible and depends +on your infrastructure and scalability needs. + +* Ensure that the `rsync` unix tool is installed on the CMS server, and ssh +access is enabled on the static content servers. + +* No password can be specified for the SSH connection . The class assumes a +key-based authentication without requiring a password for the username +specified in `` (see [http://www.csua.berkeley.edu/~ranga/notes/ssh_nopass.html +tutorial](http://www.csua.berkeley.edu/~ranga/notes/ssh_nopass.html tutorial)). + +* Put the .htaccess file linked below into the webroot of each static content +server (and rename it to `.htaccess`). It will serve requests from the cache, +statically, if the cache file exists. Replace **sitedir** with the a +subdirectory that you would like to serve the site from (for example, in your +dev environment). + + ### SILVERSTRIPE START ### + RewriteEngine On + + ## CONFIG FOR DEV ENVIRONMENTS + + # Cached content - **sitedir** subdirectory + RewriteCond %{REQUEST_METHOD} ^GET$ + RewriteCond %{QUERY_STRING} ^$ + RewriteCond %{REQUEST_URI} ^/**sitedir**/(.*)$ + RewriteCond %{REQUEST_URI} /**sitedir**/(.*[^/])/?$ + RewriteCond %{DOCUMENT_ROOT}/**sitedir**/cache/%1.html -f + RewriteCond %{REQUEST_FILENAME} !-f + RewriteRule .* /**sitedir**/cache/%1.html [L] + + # Cached content - homepage + RewriteCond %{REQUEST_METHOD} ^GET$ + RewriteCond %{QUERY_STRING} ^$ + RewriteCond %{REQUEST_URI} ^/**sitedir**/?$ + RewriteCond /**sitedir**/cache/index.html -f + RewriteCond %{REQUEST_FILENAME} !-f + RewriteRule .* /**sitedir**/cache/index.html [L] + + ## CONFIG FOR TEST/LIVE ENVIRONMENTS + + # Cached content - live webserver + RewriteCond %{REQUEST_METHOD} ^GET$ + RewriteCond %{QUERY_STRING} ^$ + RewriteCond %{REQUEST_URI} /(.*[^/])/?$ + RewriteCond %{DOCUMENT_ROOT}/cache/%1.html -f + RewriteCond %{REQUEST_FILENAME} !-f + RewriteRule .* /cache/%1.html [L] + + # Cached content - homepage + RewriteCond %{REQUEST_METHOD} ^GET$ + RewriteCond %{QUERY_STRING} ^$ + RewriteCond %{REQUEST_URI} ^/?$ + RewriteCond %{DOCUMENT_ROOT}/cache/index.html -f + RewriteCond %{REQUEST_FILENAME} !-f + RewriteRule .* /cache/index.html [L] + +## Cache Control + +There is also the option to wrap some PHP logic around the static HTML content +served by the content servers, which can greatly reduce the bandwidth required +on your content servers. This code takes care of cache control through HTTP +headers (''Cache-control'', `If-modified-since`), meaning the files will only +be delivered if they changed since the browser client last requested them. The +last modification date for each static file is controlled by the publication +script, meaning the cache gets invalidated on each publication. + +To enable cache control, specify "php" instead of "html" in the +RsyncMultiHostPublisher definition. + + :::php + Object::add_extension("SiteTree", "RsyncMultiHostPublisher('cache/', 'php')"); + + +And use this slightly different .htaccess file. Make sure that index.php can be +used as a directory index in your apache (`DirectoryIndex`) or nginx (`index`) + + ### SILVERSTRIPE START ### + RewriteEngine On + + ## CONFIG FOR DEV ENVIRONMENTS + + # Cached content - **sitedir** subdirectory + RewriteCond %{REQUEST_METHOD} ^GET$ + RewriteCond %{QUERY_STRING} ^$ + RewriteCond %{REQUEST_URI} ^/**sitedir**/(.*)$ + RewriteCond %{REQUEST_URI} /**sitedir**/(.*[^/])/?$ + RewriteCond %{DOCUMENT_ROOT}/**sitedir**/cache/%1.php -f + RewriteCond %{REQUEST_FILENAME} !-f + RewriteRule .* /**sitedir**/cache/%1.php [L] + + # Cached content - homepage + RewriteCond %{REQUEST_METHOD} ^GET$ + RewriteCond %{QUERY_STRING} ^$ + RewriteCond %{REQUEST_URI} ^/**sitedir**/?$ + RewriteCond /**sitedir**/cache/index.php -f + RewriteCond %{REQUEST_FILENAME} !-f + RewriteRule .* /**sitedir**/cache/index.php [L] + + ## CONFIG FOR TEST/LIVE ENVIRONMENTS + + # Cached content - live webserver + RewriteCond %{REQUEST_METHOD} ^GET$ + RewriteCond %{QUERY_STRING} ^$ + RewriteCond %{REQUEST_URI} /(.*[^/])/?$ + RewriteCond %{DOCUMENT_ROOT}/cache/%1.php -f + RewriteCond %{REQUEST_FILENAME} !-f + RewriteRule .* /cache/%1.php [L] + + # Cached content - homepage + RewriteCond %{REQUEST_METHOD} ^GET$ + RewriteCond %{QUERY_STRING} ^$ + RewriteCond %{REQUEST_URI} ^/?$ + RewriteCond %{DOCUMENT_ROOT}/cache/index.php -f + RewriteCond %{REQUEST_FILENAME} !-f + RewriteRule .* /cache/index.php [L] + +## Deployment + +Once you've set up your rewrite rules and defined which pages need caching, +you can build the static HTML files. This is done by the `RebuildStaticCacheTask` + +Execution via URL + + http://www.yoursite.com/dev/buildcache?flush=1 + +Execution on CLI (via [sake](framework/en/topics/commandline)) + + sake dev/buildcache flush=1 + +Depending on which extension you've set up for your SiteTree (FilesystemPublisher +or RsyncMultiHostPublisher), the method publishPages() either stores the generated +HTML-files on the server's filesystem, or deploys them to other servers via rsync. + +It is advisable to set dev/buildcache up as an automated task (e.g. unix cron) +which continually rebuilds and redeploys the cache. + +## Related + +* `[StaticExporter](StaticExporter)` +* [Partial-Caching](/framework/partial-caching)