2012-09-21 07:47:26 +02:00
|
|
|
<?php
|
|
|
|
|
|
|
|
/**
|
2012-09-25 10:02:26 +02:00
|
|
|
* @package staticpublisher
|
2012-09-21 07:47:26 +02:00
|
|
|
*/
|
|
|
|
class FilesystemPublisher extends StaticPublisher {
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var string
|
|
|
|
*/
|
|
|
|
protected $destFolder = 'cache';
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var string
|
|
|
|
*/
|
|
|
|
protected $fileExtension = 'html';
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var string
|
2013-05-24 09:46:15 +02:00
|
|
|
*
|
2013-04-06 06:24:51 +02:00
|
|
|
* @config
|
2012-09-21 07:47:26 +02:00
|
|
|
*/
|
2013-04-06 06:24:51 +02:00
|
|
|
private static $static_base_url = null;
|
2012-09-21 07:47:26 +02:00
|
|
|
|
|
|
|
/**
|
2013-04-06 06:24:51 +02:00
|
|
|
* @config
|
2012-09-21 07:47:26 +02:00
|
|
|
*
|
2013-04-06 06:24:51 +02:00
|
|
|
* @var Boolean Use domain based cacheing (put cache files into a domain subfolder)
|
2012-09-21 07:47:26 +02:00
|
|
|
* This must be true if you are using this with the "subsites" module.
|
|
|
|
* Please note that this form of caching requires all URLs to be provided absolute
|
|
|
|
* (not relative to the webroot) via {@link SiteTree->AbsoluteLink()}.
|
|
|
|
*/
|
2013-04-06 06:24:51 +02:00
|
|
|
private static $domain_based_caching = false;
|
2012-09-21 07:47:26 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Set a different base URL for the static copy of the site.
|
2013-04-06 06:24:51 +02:00
|
|
|
* This can be useful if you are running the CMS on a different domain from the website.
|
2012-09-21 07:47:26 +02:00
|
|
|
*
|
2013-04-06 06:24:51 +02:00
|
|
|
* @deprecated 3.2 Use the "FilesystemPublisher.static_base_url" config setting instead
|
2012-09-21 07:47:26 +02:00
|
|
|
*/
|
2013-04-06 06:24:51 +02:00
|
|
|
static public function set_static_base_url($url) {
|
|
|
|
Deprecation::notice('3.2', 'Use the "FilesystemPublisher.static_base_url" config setting instead');
|
|
|
|
|
|
|
|
Config::inst()->update('FilesystemPublisher', 'static_base_url', $url);
|
2012-09-21 07:47:26 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param $destFolder The folder to save the cached site into.
|
|
|
|
* This needs to be set in framework/static-main.php as well through the {@link $cacheBaseDir} variable.
|
|
|
|
* @param $fileExtension The file extension to use, e.g 'html'.
|
|
|
|
* If omitted, then each page will be placed in its own directory,
|
|
|
|
* with the filename 'index.html'. If you set the extension to PHP, then a simple PHP script will
|
|
|
|
* be generated that can do appropriate cache & redirect header negotation.
|
|
|
|
*/
|
|
|
|
public function __construct($destFolder = 'cache', $fileExtension = null) {
|
|
|
|
// Remove trailing slash from folder
|
2013-04-06 06:24:51 +02:00
|
|
|
if(substr($destFolder, -1) == '/') {
|
|
|
|
$destFolder = substr($destFolder, 0, -1);
|
|
|
|
}
|
2012-09-21 07:47:26 +02:00
|
|
|
|
|
|
|
$this->destFolder = $destFolder;
|
2013-04-06 07:35:08 +02:00
|
|
|
|
|
|
|
if($fileExtension) {
|
|
|
|
$this->fileExtension = $fileExtension;
|
|
|
|
}
|
2012-09-21 07:47:26 +02:00
|
|
|
|
|
|
|
parent::__construct();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Transforms relative or absolute URLs to their static path equivalent.
|
|
|
|
* This needs to be the same logic that's used to look up these paths through
|
2013-04-06 06:24:51 +02:00
|
|
|
* framework/static-main.php. Does not include the {@link $destFolder} prefix.
|
2012-09-21 07:47:26 +02:00
|
|
|
*
|
2013-04-06 06:24:51 +02:00
|
|
|
* URL filtering will have already taken place for direct SiteTree links via SiteTree->generateURLSegment()).
|
|
|
|
* For all other links (e.g. custom controller actions), we assume that they're pre-sanitized
|
|
|
|
* to suit the filesystem needs, as its impossible to sanitize them without risking to break
|
|
|
|
* the underlying naming assumptions in URL routing (e.g. controller method names).
|
2012-09-21 07:47:26 +02:00
|
|
|
*
|
|
|
|
* Examples (without $domain_based_caching):
|
2013-04-06 06:24:51 +02:00
|
|
|
* - http://mysite.com/mywebroot/ => /index.html (assuming your webroot is in a subfolder)
|
2012-09-21 07:47:26 +02:00
|
|
|
* - http://mysite.com/about-us => /about-us.html
|
|
|
|
* - http://mysite.com/parent/child => /parent/child.html
|
|
|
|
*
|
|
|
|
* Examples (with $domain_based_caching):
|
|
|
|
* - http://mysite.com/mywebroot/ => /mysite.com/index.html (assuming your webroot is in a subfolder)
|
|
|
|
* - http://mysite.com/about-us => /mysite.com/about-us.html
|
|
|
|
* - http://myothersite.com/about-us => /myothersite.com/about-us.html
|
|
|
|
* - http://subdomain.mysite.com/parent/child => /subdomain.mysite.com/parent/child.html
|
|
|
|
*
|
|
|
|
* @param array $urls Absolute or relative URLs
|
2013-04-06 06:24:51 +02:00
|
|
|
* @return array Map of original URLs to filesystem paths (relative to {@link $destFolder}).
|
2012-09-21 07:47:26 +02:00
|
|
|
*/
|
|
|
|
public function urlsToPaths($urls) {
|
|
|
|
$mappedUrls = array();
|
2013-04-06 06:24:51 +02:00
|
|
|
|
2012-09-21 07:47:26 +02:00
|
|
|
foreach($urls as $url) {
|
|
|
|
|
|
|
|
// parse_url() is not multibyte safe, see https://bugs.php.net/bug.php?id=52923.
|
|
|
|
// We assume that the URL hsa been correctly encoded either on storage (for SiteTree->URLSegment),
|
|
|
|
// or through URL collection (for controller method names etc.).
|
|
|
|
$urlParts = @parse_url($url);
|
|
|
|
|
|
|
|
// Remove base folders from the URL if webroot is hosted in a subfolder (same as static-main.php)
|
|
|
|
$path = isset($urlParts['path']) ? $urlParts['path'] : '';
|
|
|
|
if(mb_substr(mb_strtolower($path), 0, mb_strlen(BASE_URL)) == mb_strtolower(BASE_URL)) {
|
|
|
|
$urlSegment = mb_substr($path, mb_strlen(BASE_URL));
|
|
|
|
} else {
|
|
|
|
$urlSegment = $path;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Normalize URLs
|
|
|
|
$urlSegment = trim($urlSegment, '/');
|
|
|
|
|
|
|
|
$filename = $urlSegment ? "$urlSegment.$this->fileExtension" : "index.$this->fileExtension";
|
|
|
|
|
2013-04-06 06:24:51 +02:00
|
|
|
if (Config::inst()->get('FilesystemPublisher', 'domain_based_caching')) {
|
2012-09-21 07:47:26 +02:00
|
|
|
if (!$urlParts) continue; // seriously malformed url here...
|
|
|
|
$filename = $urlParts['host'] . '/' . $filename;
|
|
|
|
}
|
|
|
|
|
|
|
|
$mappedUrls[$url] = ((dirname($filename) == '/') ? '' : (dirname($filename).'/')).basename($filename);
|
|
|
|
}
|
|
|
|
|
|
|
|
return $mappedUrls;
|
|
|
|
}
|
|
|
|
|
2013-04-06 06:24:51 +02:00
|
|
|
/**
|
|
|
|
* @param array $urls
|
|
|
|
*/
|
2012-09-21 07:47:26 +02:00
|
|
|
public function unpublishPages($urls) {
|
|
|
|
// Do we need to map these?
|
|
|
|
// Detect a numerically indexed arrays
|
|
|
|
if (is_numeric(join('', array_keys($urls)))) $urls = $this->urlsToPaths($urls);
|
|
|
|
|
|
|
|
// This can be quite memory hungry and time-consuming
|
|
|
|
// @todo - Make a more memory efficient publisher
|
|
|
|
increase_time_limit_to();
|
|
|
|
increase_memory_limit_to();
|
|
|
|
|
|
|
|
$cacheBaseDir = $this->getDestDir();
|
|
|
|
|
|
|
|
foreach($urls as $url => $path) {
|
|
|
|
if (file_exists($cacheBaseDir.'/'.$path)) {
|
|
|
|
@unlink($cacheBaseDir.'/'.$path);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-04-06 06:24:51 +02:00
|
|
|
/**
|
|
|
|
* Uses {@link Director::test()} to perform in-memory HTTP requests
|
|
|
|
* on the passed-in URLs.
|
|
|
|
*
|
|
|
|
* @param array $urls Relative URLs
|
|
|
|
* @return array Result, keyed by URL. Keys:
|
|
|
|
* - "statuscode": The HTTP status code
|
|
|
|
* - "redirect": A redirect location (if applicable)
|
|
|
|
* - "path": The filesystem path where the cache has been written
|
|
|
|
*/
|
2012-09-21 07:47:26 +02:00
|
|
|
public function publishPages($urls) {
|
2013-04-06 06:24:51 +02:00
|
|
|
$result = array();
|
|
|
|
|
2012-09-21 07:47:26 +02:00
|
|
|
// Do we need to map these?
|
|
|
|
// Detect a numerically indexed arrays
|
|
|
|
if (is_numeric(join('', array_keys($urls)))) $urls = $this->urlsToPaths($urls);
|
|
|
|
|
|
|
|
// This can be quite memory hungry and time-consuming
|
|
|
|
// @todo - Make a more memory efficient publisher
|
|
|
|
increase_time_limit_to();
|
|
|
|
increase_memory_limit_to();
|
|
|
|
|
|
|
|
// Set the appropriate theme for this publication batch.
|
|
|
|
// This may have been set explicitly via StaticPublisher::static_publisher_theme,
|
|
|
|
// or we can use the last non-null theme.
|
2013-04-06 06:24:51 +02:00
|
|
|
$customTheme = Config::inst()->get('StaticPublisher', 'static_publisher_theme');
|
2013-04-06 07:35:08 +02:00
|
|
|
|
2013-05-24 08:13:04 +02:00
|
|
|
if($customTheme) {
|
2013-04-06 06:24:51 +02:00
|
|
|
Config::inst()->update('SSViewer', 'theme', $customTheme);
|
|
|
|
}
|
2013-05-24 08:13:04 +02:00
|
|
|
|
|
|
|
// Ensure that the theme that is set gets used.
|
|
|
|
Config::inst()->update('SSViewer', 'theme_enabled', true);
|
2012-09-21 07:47:26 +02:00
|
|
|
|
|
|
|
$currentBaseURL = Director::baseURL();
|
2013-04-06 06:24:51 +02:00
|
|
|
$staticBaseUrl = Config::inst()->get('FilesystemPublisher', 'static_base_url');
|
|
|
|
|
|
|
|
if($staticBaseUrl) {
|
|
|
|
Config::inst()->update('Director', 'alternate_base_url', $staticBaseUrl);
|
|
|
|
}
|
|
|
|
|
|
|
|
if($this->fileExtension == 'php') {
|
|
|
|
Config::inst()->update('SSViewer', 'rewrite_hash_links', 'php');
|
|
|
|
}
|
|
|
|
|
|
|
|
if(Config::inst()->get('StaticPublisher', 'echo_progress')) {
|
|
|
|
echo $this->class.": Publishing to " . $staticBaseUrl . "\n";
|
|
|
|
}
|
|
|
|
|
2012-09-21 07:47:26 +02:00
|
|
|
$files = array();
|
|
|
|
$i = 0;
|
|
|
|
$totalURLs = sizeof($urls);
|
|
|
|
|
|
|
|
foreach($urls as $url => $path) {
|
2013-04-06 06:24:51 +02:00
|
|
|
$origUrl = $url;
|
2013-04-06 07:35:08 +02:00
|
|
|
$result[$origUrl] = array(
|
|
|
|
'statuscode' => null,
|
|
|
|
'redirect' => null,
|
|
|
|
'path' => null
|
|
|
|
);
|
2012-09-21 07:47:26 +02:00
|
|
|
|
2013-04-06 07:35:08 +02:00
|
|
|
if($staticBaseUrl) {
|
|
|
|
Config::inst()->update('Director', 'alternate_base_url', $staticBaseUrl);
|
|
|
|
}
|
|
|
|
|
2012-09-21 07:47:26 +02:00
|
|
|
$i++;
|
|
|
|
|
|
|
|
if($url && !is_string($url)) {
|
|
|
|
user_error("Bad url:" . var_export($url,true), E_USER_WARNING);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2013-04-06 06:24:51 +02:00
|
|
|
if(Config::inst()->get('StaticPublisher', 'echo_progress')) {
|
2012-09-21 07:47:26 +02:00
|
|
|
echo " * Publishing page $i/$totalURLs: $url\n";
|
|
|
|
flush();
|
|
|
|
}
|
|
|
|
|
|
|
|
Requirements::clear();
|
|
|
|
|
|
|
|
if($url == "") $url = "/";
|
|
|
|
if(Director::is_relative_url($url)) $url = Director::absoluteURL($url);
|
|
|
|
$response = Director::test(str_replace('+', ' ', $url));
|
|
|
|
|
2013-04-06 06:24:51 +02:00
|
|
|
if($response) {
|
|
|
|
$result[$origUrl]['statuscode'] = $response->getStatusCode();
|
|
|
|
}
|
|
|
|
|
2012-09-21 07:47:26 +02:00
|
|
|
Requirements::clear();
|
|
|
|
|
|
|
|
singleton('DataObject')->flushCache();
|
|
|
|
|
|
|
|
//skip any responses with a 404 status code. We don't want to turn those into statically cached pages
|
|
|
|
if (!$response || $response->getStatusCode() == '404') continue;
|
|
|
|
|
|
|
|
// Generate file content
|
|
|
|
// PHP file caching will generate a simple script from a template
|
|
|
|
if($this->fileExtension == 'php') {
|
|
|
|
if(is_object($response)) {
|
|
|
|
if($response->getStatusCode() == '301' || $response->getStatusCode() == '302') {
|
|
|
|
$content = $this->generatePHPCacheRedirection($response->getHeader('Location'));
|
|
|
|
} else {
|
|
|
|
$content = $this->generatePHPCacheFile($response->getBody(), HTTP::get_cache_age(), date('Y-m-d H:i:s'));
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
$content = $this->generatePHPCacheFile($response . '', HTTP::get_cache_age(), date('Y-m-d H:i:s'));
|
|
|
|
}
|
|
|
|
|
|
|
|
// HTML file caching generally just creates a simple file
|
|
|
|
} else {
|
|
|
|
if(is_object($response)) {
|
|
|
|
if($response->getStatusCode() == '301' || $response->getStatusCode() == '302') {
|
|
|
|
$absoluteURL = Director::absoluteURL($response->getHeader('Location'));
|
2013-04-06 06:24:51 +02:00
|
|
|
$result[$origUrl]['redirect'] = $response->getHeader('Location');
|
2012-09-21 07:47:26 +02:00
|
|
|
$content = "<meta http-equiv=\"refresh\" content=\"2; URL=$absoluteURL\">";
|
|
|
|
} else {
|
|
|
|
$content = $response->getBody();
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
$content = $response . '';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-04-06 06:24:51 +02:00
|
|
|
if(Config::inst()->get('StaticPublisher', 'include_caching_metadata')) {
|
|
|
|
$content = str_replace(
|
|
|
|
'</html>',
|
|
|
|
sprintf("</html>\n\n<!-- %s -->", implode(" ", $this->getMetadata($url))),
|
|
|
|
$content
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
$files[$origUrl] = array(
|
2012-09-21 07:47:26 +02:00
|
|
|
'Content' => $content,
|
|
|
|
'Folder' => dirname($path).'/',
|
|
|
|
'Filename' => basename($path),
|
|
|
|
);
|
|
|
|
|
|
|
|
// Add externals
|
|
|
|
/*
|
|
|
|
$externals = $this->externalReferencesFor($content);
|
|
|
|
if($externals) foreach($externals as $external) {
|
|
|
|
// Skip absolute URLs
|
|
|
|
if(preg_match('/^[a-zA-Z]+:\/\//', $external)) continue;
|
|
|
|
// Drop querystring parameters
|
|
|
|
$external = strtok($external, '?');
|
|
|
|
|
|
|
|
if(file_exists("../" . $external)) {
|
|
|
|
// Break into folder and filename
|
|
|
|
if(preg_match('/^(.*\/)([^\/]+)$/', $external, $matches)) {
|
|
|
|
$files[$external] = array(
|
|
|
|
"Copy" => "../$external",
|
|
|
|
"Folder" => $matches[1],
|
|
|
|
"Filename" => $matches[2],
|
|
|
|
);
|
|
|
|
|
|
|
|
} else {
|
|
|
|
user_error("Can't parse external: $external", E_USER_WARNING);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
$missingFiles[$external] = true;
|
|
|
|
}
|
|
|
|
}*/
|
|
|
|
}
|
|
|
|
|
2013-04-06 06:24:51 +02:00
|
|
|
if(Config::inst()->get('FilesystemPublisher', 'static_base_url')) {
|
|
|
|
Config::inst()->update('Director', 'alternate_base_url', $currentBaseURL);
|
|
|
|
}
|
|
|
|
|
|
|
|
if($this->fileExtension == 'php') {
|
|
|
|
Config::inst()->update('SSViewer', 'rewrite_hash_links', true);
|
|
|
|
}
|
2012-09-21 07:47:26 +02:00
|
|
|
|
|
|
|
$base = BASE_PATH . "/$this->destFolder";
|
|
|
|
|
2013-04-06 06:24:51 +02:00
|
|
|
foreach($files as $origUrl => $file) {
|
2012-09-21 07:47:26 +02:00
|
|
|
Filesystem::makeFolder("$base/$file[Folder]");
|
|
|
|
|
2013-04-06 06:24:51 +02:00
|
|
|
$path = "$base/$file[Folder]$file[Filename]";
|
|
|
|
$result[$origUrl]['path'] = $path;
|
|
|
|
|
2012-09-21 07:47:26 +02:00
|
|
|
if(isset($file['Content'])) {
|
2013-04-06 06:24:51 +02:00
|
|
|
$fh = fopen($path, "w");
|
2012-09-21 07:47:26 +02:00
|
|
|
fwrite($fh, $file['Content']);
|
|
|
|
fclose($fh);
|
|
|
|
} else if(isset($file['Copy'])) {
|
2013-04-06 06:24:51 +02:00
|
|
|
copy($file['Copy'], $path);
|
2012-09-21 07:47:26 +02:00
|
|
|
}
|
|
|
|
}
|
2013-04-06 06:24:51 +02:00
|
|
|
|
|
|
|
return $result;
|
2012-09-21 07:47:26 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Generate the templated content for a PHP script that can serve up the
|
|
|
|
* given piece of content with the given age and expiry.
|
|
|
|
*
|
2013-04-06 06:24:51 +02:00
|
|
|
* @param string $content
|
|
|
|
* @param string $age
|
|
|
|
* @param string $lastModified
|
|
|
|
*
|
|
|
|
* @return string
|
2012-09-21 07:47:26 +02:00
|
|
|
*/
|
|
|
|
protected function generatePHPCacheFile($content, $age, $lastModified) {
|
2013-05-23 11:44:11 +02:00
|
|
|
$template = file_get_contents(STATIC_MODULE_DIR . '/code/CachedPHPPage.tmpl');
|
2012-09-21 07:47:26 +02:00
|
|
|
|
|
|
|
return str_replace(
|
2013-04-06 06:24:51 +02:00
|
|
|
array('**MAX_AGE**', '**LAST_MODIFIED**', '**CONTENT**'),
|
|
|
|
array((int)$age, $lastModified, $content),
|
|
|
|
$template
|
|
|
|
);
|
2012-09-21 07:47:26 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Generate the templated content for a PHP script that can serve up a 301
|
|
|
|
* redirect to the given destination.
|
|
|
|
*
|
2013-04-06 06:24:51 +02:00
|
|
|
* @param string $destination
|
|
|
|
*
|
2012-09-21 07:47:26 +02:00
|
|
|
* @return string
|
|
|
|
*/
|
|
|
|
protected function generatePHPCacheRedirection($destination) {
|
|
|
|
$template = file_get_contents(STATIC_MODULE_DIR . '/code/CachedPHPRedirection.tmpl');
|
|
|
|
|
|
|
|
return str_replace(
|
|
|
|
array('**DESTINATION**'),
|
|
|
|
array($destination),
|
|
|
|
$template
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @return string
|
|
|
|
*/
|
|
|
|
public function getDestDir() {
|
|
|
|
return BASE_PATH . '/' . $this->destFolder;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Return an array of all the existing static cache files, as a map of
|
|
|
|
* URL => file. Only returns cache files that will actually map to a URL,
|
|
|
|
* based on urlsToPaths.
|
2013-04-06 06:24:51 +02:00
|
|
|
*
|
2012-09-21 07:47:26 +02:00
|
|
|
* @return array
|
|
|
|
*/
|
|
|
|
public function getExistingStaticCacheFiles() {
|
|
|
|
$cacheDir = BASE_PATH . '/' . $this->destFolder;
|
|
|
|
|
2013-04-06 06:24:51 +02:00
|
|
|
$urlMapper = array_flip($this->urlsToPaths($this->owner->allPagesToCache()));
|
2012-09-21 07:47:26 +02:00
|
|
|
|
|
|
|
$output = array();
|
|
|
|
|
|
|
|
// Glob each dir, then glob each one of those
|
|
|
|
foreach(glob("$cacheDir/*", GLOB_ONLYDIR) as $cacheDir) {
|
|
|
|
foreach(glob($cacheDir.'/*') as $cacheFile) {
|
|
|
|
$mapKey = str_replace(BASE_PATH . "/cache/","",$cacheFile);
|
|
|
|
if(isset($urlMapper[$mapKey])) {
|
|
|
|
$url = $urlMapper[$mapKey];
|
|
|
|
$output[$url] = $cacheFile;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return $output;
|
|
|
|
}
|
2013-04-06 06:24:51 +02:00
|
|
|
}
|