482 lines
16 KiB
PHP
Raw Normal View History

<?php
2013-05-10 21:59:20 +12:00
/**
* A class with HTTP-related helpers.
* Like Debug, this is more a bundle of methods than a class ;-)
2014-08-15 18:53:05 +12:00
*
* @package framework
* @subpackage misc
*/
class HTTP {
2013-05-10 21:59:20 +12:00
/**
* @var int $cache_age
*/
protected static $cache_age = 0;
2013-05-10 21:59:20 +12:00
/**
* @var timestamp $modification_date
*/
protected static $modification_date = null;
2013-05-10 21:59:20 +12:00
/**
* @var string $etag
*/
protected static $etag = null;
/**
2015-07-27 19:17:37 +12:00
* @config
*/
private static $cache_ajax_requests = true;
/**
2014-08-15 18:53:05 +12:00
* Turns a local system filename into a URL by comparing it to the script
2013-05-10 21:59:20 +12:00
* filename.
*
* @param string
*/
public static function filename2url($filename) {
$slashPos = -1;
2013-05-10 21:59:20 +12:00
while(($slashPos = strpos($filename, "/", $slashPos+1)) !== false) {
2015-07-27 19:17:37 +12:00
if(substr($filename, 0, $slashPos) == substr($_SERVER['SCRIPT_FILENAME'], 0, $slashPos)) {
$commonLength = $slashPos;
} else {
break;
}
}
2013-05-10 21:59:20 +12:00
$urlBase = substr(
2014-08-15 18:53:05 +12:00
$_SERVER['PHP_SELF'],
0,
2013-05-10 21:59:20 +12:00
-(strlen($_SERVER['SCRIPT_FILENAME']) - $commonLength)
);
2014-08-15 18:53:05 +12:00
$url = $urlBase . substr($filename, $commonLength);
$protocol = (isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] != 'off') ? "https" : "http";
// Count the number of extra folders the script is in.
2015-07-27 19:17:37 +12:00
// $prefix = str_repeat("../", substr_count(substr($_SERVER[SCRIPT_FILENAME], $commonBaseLength)));
2014-08-15 18:53:05 +12:00
2013-05-10 21:59:20 +12:00
return "$protocol://". $_SERVER['HTTP_HOST'] . $url;
}
/**
* Turn all relative URLs in the content to absolute URLs
*/
public static function absoluteURLs($html) {
$html = str_replace('$CurrentPageURL', $_SERVER['REQUEST_URI'], $html);
return HTTP::urlRewriter($html, function($url) {
//no need to rewrite, if uri has a protocol (determined here by existence of reserved URI character ":")
2015-07-27 19:17:37 +12:00
if(preg_match('/^\w+:/', $url)) {
return $url;
}
return Director::absoluteURL($url, true);
});
}
/**
* Rewrite all the URLs in the given content, evaluating the given string as PHP code.
*
* Put $URL where you want the URL to appear, however, you can't embed $URL in strings
* Some example code:
* <ul>
* <li><code>'"../../" . $URL'</code></li>
* <li><code>'myRewriter($URL)'</code></li>
2015-07-27 19:17:37 +12:00
* <li><code>'(substr($URL, 0, 1)=="/") ? "../" . substr($URL, 1) : $URL'</code></li>
* </ul>
2014-08-15 18:53:05 +12:00
*
* As of 3.2 $code should be a callable which takes a single parameter and returns
* the rewritten URL. e.g.
2014-08-15 18:53:05 +12:00
*
* <code>
2014-08-15 18:53:05 +12:00
* function($url) {
* return Director::absoluteURL($url, true);
* }
* </code>
2014-08-15 18:53:05 +12:00
*
* @param string $content The HTML to search for links to rewrite
* @param string|callable $code Either a string that can evaluate to an expression
2014-08-15 18:53:05 +12:00
* to rewrite links (depreciated), or a callable that takes a single
* parameter and returns the rewritten URL
* @return The content with all links rewritten as per the logic specified in $code
*/
public static function urlRewriter($content, $code) {
if(!is_callable($code)) {
Deprecation::notice('4.0', 'HTTP::urlRewriter expects a callable as the second parameter');
}
2014-08-15 18:53:05 +12:00
// Replace attributes
2015-07-27 19:17:37 +12:00
$attribs = array("src", "background", "a" => "href", "link" => "href", "base" => "href");
foreach($attribs as $tag => $attrib) {
if(!is_numeric($tag)) $tagPrefix = "$tag ";
else $tagPrefix = "";
$regExps[] = "/(<{$tagPrefix}[^>]*$attrib *= *\")([^\"]*)(\")/i";
$regExps[] = "/(<{$tagPrefix}[^>]*$attrib *= *')([^']*)(')/i";
$regExps[] = "/(<{$tagPrefix}[^>]*$attrib *= *)([^\"' ]*)( )/i";
}
// Replace css styles
// @todo - http://www.css3.info/preview/multiple-backgrounds/
$styles = array('background-image', 'background', 'list-style-image', 'list-style', 'content');
foreach($styles as $style) {
$regExps[] = "/($style:[^;]*url *\(\")([^\"]+)(\"\))/i";
$regExps[] = "/($style:[^;]*url *\(')([^']+)('\))/i";
$regExps[] = "/($style:[^;]*url *\()([^\"\)')]+)(\))/i";
}
// Callback for regexp replacement
$callback = function($matches) use($code) {
if(is_callable($code)) {
$rewritten = $code($matches[2]);
} else {
// Expose the $URL variable to be used by the $code expression
$URL = $matches[2];
$rewritten = eval("return ($code);");
}
return $matches[1] . $rewritten . $matches[3];
};
// Execute each expression
foreach($regExps as $regExp) {
$content = preg_replace_callback($regExp, $callback, $content);
}
return $content;
}
/**
* Will try to include a GET parameter for an existing URL,
* preserving existing parameters and fragments.
* If no URL is given, falls back to $_SERVER['REQUEST_URI'].
* Uses parse_url() to dissect the URL, and http_build_query() to reconstruct it
* with the additional parameter. Converts any '&' (ampersand)
* URL parameter separators to the more XHTML compliant '&amp;'.
2014-08-15 18:53:05 +12:00
*
* CAUTION: If the URL is determined to be relative,
* it is prepended with Director::absoluteBaseURL().
* This method will always return an absolute URL because
* Director::makeRelative() can lead to inconsistent results.
2014-08-15 18:53:05 +12:00
*
* @param string $varname
* @param string $varvalue
* @param string $currentURL Relative or absolute URL (Optional).
* @param string $separator Separator for http_build_query(). (Optional).
* @return string Absolute URL
*/
public static function setGetVar($varname, $varvalue, $currentURL = null, $separator = '&amp;') {
$uri = $currentURL ? $currentURL : Director::makeRelative($_SERVER['REQUEST_URI']);
$isRelative = false;
// We need absolute URLs for parse_url()
if(Director::is_relative_url($uri)) {
$uri = Director::absoluteBaseURL() . $uri;
$isRelative = true;
}
// try to parse uri
$parts = parse_url($uri);
if(!$parts) {
throw new InvalidArgumentException("Can't parse URL: " . $uri);
}
// Parse params and add new variable
$params = array();
if(isset($parts['query'])) parse_str($parts['query'], $params);
$params[$varname] = $varvalue;
2014-08-15 18:53:05 +12:00
// Generate URI segments and formatting
$scheme = (isset($parts['scheme'])) ? $parts['scheme'] : 'http';
$user = (isset($parts['user']) && $parts['user'] != '') ? $parts['user'] : '';
if($user != '') {
// format in either user:pass@host.com or user@host.com
$user .= (isset($parts['pass']) && $parts['pass'] != '') ? ':' . $parts['pass'] . '@' : '@';
}
2014-08-15 18:53:05 +12:00
$host = (isset($parts['host'])) ? $parts['host'] : '';
$port = (isset($parts['port']) && $parts['port'] != '') ? ':'.$parts['port'] : '';
$path = (isset($parts['path']) && $parts['path'] != '') ? $parts['path'] : '';
2014-08-15 18:53:05 +12:00
// handle URL params which are existing / new
$params = ($params) ? '?' . http_build_query($params, null, $separator) : '';
2014-08-15 18:53:05 +12:00
// keep fragments (anchors) intact.
$fragment = (isset($parts['fragment']) && $parts['fragment'] != '') ? '#'.$parts['fragment'] : '';
2014-08-15 18:53:05 +12:00
// Recompile URI segments
$newUri = $scheme . '://' . $user . $host . $port . $path . $params . $fragment;
if($isRelative) return Director::makeRelative($newUri);
2014-08-15 18:53:05 +12:00
return $newUri;
}
public static function RAW_setGetVar($varname, $varvalue, $currentURL = null) {
$url = self::setGetVar($varname, $varvalue, $currentURL);
return Convert::xml2raw($url);
}
2014-08-15 18:53:05 +12:00
/**
* Search for all tags with a specific attribute, then return the value of that attribute in a flat array.
*
* @param string $content
* @param array $attributes an array of tags to attributes, for example "[a] => 'href', [div] => 'id'"
* @return array
*/
public static function findByTagAndAttribute($content, $attributes) {
$regexes = array();
2014-08-15 18:53:05 +12:00
foreach($attributes as $tag => $attribute) {
$regexes[] = "/<{$tag} [^>]*$attribute *= *([\"'])(.*?)\\1[^>]*>/i";
$regexes[] = "/<{$tag} [^>]*$attribute *= *([^ \"'>]+)/i";
}
2014-08-15 18:53:05 +12:00
$result = array();
2014-08-15 18:53:05 +12:00
if($regexes) foreach($regexes as $regex) {
if(preg_match_all($regex, $content, $matches)) {
$result = array_merge_recursive($result, (isset($matches[2]) ? $matches[2] : $matches[1]));
}
}
2014-08-15 18:53:05 +12:00
return count($result) ? $result : null;
}
2014-08-15 18:53:05 +12:00
public static function getLinksIn($content) {
return self::findByTagAndAttribute($content, array("a" => "href"));
}
2014-08-15 18:53:05 +12:00
public static function getImagesIn($content) {
return self::findByTagAndAttribute($content, array("img" => "src"));
}
2014-08-15 18:53:05 +12:00
/**
* Get the MIME type based on a file's extension.
*
* If the finfo class exists in PHP, and the file actually exists, then use that
* extension, otherwise fallback to a list of commonly known MIME types.
*
* @uses finfo
* @param string $filename Relative path to filename from project root, e.g. "mysite/tests/file.csv"
* @return string MIME type
*/
public static function get_mime_type($filename) {
// If the finfo module is compiled into PHP, use it.
$path = BASE_PATH . DIRECTORY_SEPARATOR . $filename;
if(class_exists('finfo') && file_exists($path)) {
$finfo = new finfo(FILEINFO_MIME_TYPE);
return $finfo->file($path);
}
// Fallback to use the list from the HTTP.yml configuration and rely on the file extension
// to get the file mime-type
$ext = File::get_file_extension($filename);
// Get the mime-types
$mimeTypes = Config::inst()->get('HTTP', 'MimeTypes');
// The mime type doesn't exist
if(!isset($mimeTypes[$ext])) {
return 'application/unknown';
}
return $mimeTypes[$ext];
}
/**
* Set the maximum age of this page in web caches, in seconds
*/
public static function set_cache_age($age) {
self::$cache_age = $age;
}
public static function register_modification_date($dateString) {
$timestamp = strtotime($dateString);
if($timestamp > self::$modification_date)
self::$modification_date = $timestamp;
}
public static function register_modification_timestamp($timestamp) {
if($timestamp > self::$modification_date)
self::$modification_date = $timestamp;
}
public static function register_etag($etag) {
self::$etag = $etag;
}
/**
* Add the appropriate caching headers to the response, including If-Modified-Since / 304 handling.
* Note that setting HTTP::$cache_age will overrule any cache headers set by PHP's
* session_cache_limiter functionality. It is your responsibility to ensure only cacheable data
* is in fact cached, and HTTP::$cache_age isn't set when the HTTP body contains session-specific content.
*
2015-06-20 17:57:18 +01:00
* @param SS_HTTPResponse $body The SS_HTTPResponse object to augment. Omitted the argument or passing a string is
* deprecated; in these cases, the headers are output directly.
*/
public static function add_cache_headers($body = null) {
$cacheAge = self::$cache_age;
2014-08-15 18:53:05 +12:00
// Validate argument
if($body && !($body instanceof SS_HTTPResponse)) {
user_error("HTTP::add_cache_headers() must be passed an SS_HTTPResponse object", E_USER_WARNING);
$body = null;
}
// Development sites have frequently changing templates; this can get stuffed up by the code
// below.
if(Director::isDev()) $cacheAge = 0;
2014-08-15 18:53:05 +12:00
// The headers have been sent and we don't have an SS_HTTPResponse object to attach things to; no point in
// us trying.
if(headers_sent() && !$body) return;
2015-06-20 17:57:18 +01:00
// Populate $responseHeaders with all the headers that we want to build
$responseHeaders = array();
$config = Config::inst();
$cacheControlHeaders = Config::inst()->get('HTTP', 'cache_control');
2015-06-20 17:57:18 +01:00
// currently using a config setting to cancel this, seems to be so that the CMS caches ajax requests
if(function_exists('apache_request_headers') && $config->get(get_called_class(), 'cache_ajax_requests')) {
2015-06-20 17:57:18 +01:00
$requestHeaders = array_change_key_case(apache_request_headers(), CASE_LOWER);
if(isset($requestHeaders['x-requested-with']) && $requestHeaders['x-requested-with']=='XMLHttpRequest') {
$cacheAge = 0;
}
}
if($cacheAge > 0) {
$cacheControlHeaders['max-age'] = self::$cache_age;
// Set empty pragma to avoid PHP's session_cache_limiter adding conflicting caching information,
// defaulting to "nocache" on most PHP configurations (see http://php.net/session_cache_limiter).
// Since it's a deprecated HTTP 1.0 option, all modern HTTP clients and proxies should
// prefer the caching information indicated through the "Cache-Control" header.
$responseHeaders["Pragma"] = "";
// To do: User-Agent should only be added in situations where you *are* actually
2013-02-18 14:41:49 +01:00
// varying according to user-agent.
$responseHeaders['Vary'] = 'Cookie, X-Forwarded-Protocol, User-Agent, Accept';
}
else {
if($body) {
// Grab header for checking. Unfortunately HTTPRequest uses a mistyped variant.
$contentDisposition = $body->getHeader('Content-disposition');
if (!$contentDisposition) $contentDisposition = $body->getHeader('Content-Disposition');
}
if(
$body &&
Director::is_https() &&
isset($_SERVER['HTTP_USER_AGENT']) &&
strstr($_SERVER['HTTP_USER_AGENT'], 'MSIE')==true &&
strstr($contentDisposition, 'attachment;')==true
) {
// IE6-IE8 have problems saving files when https and no-cache are used
// (http://support.microsoft.com/kb/323308)
// Note: this is also fixable by ticking "Do not save encrypted pages to disk" in advanced options.
$cacheControlHeaders['max-age'] = 3;
// Set empty pragma to avoid PHP's session_cache_limiter adding conflicting caching information,
// defaulting to "nocache" on most PHP configurations (see http://php.net/session_cache_limiter).
// Since it's a deprecated HTTP 1.0 option, all modern HTTP clients and proxies should
// prefer the caching information indicated through the "Cache-Control" header.
$responseHeaders["Pragma"] = "";
} else {
$cacheControlHeaders['no-cache'] = "true";
}
}
foreach($cacheControlHeaders as $header => $value) {
if(is_null($value)) {
unset($cacheControlHeaders[$header]);
2015-06-20 17:57:18 +01:00
} elseif((is_bool($value) && $value) || $value === "true") {
$cacheControlHeaders[$header] = $header;
} else {
$cacheControlHeaders[$header] = $header."=".$value;
}
}
2015-06-20 17:57:18 +01:00
$responseHeaders['Cache-Control'] = implode(', ', $cacheControlHeaders);
unset($cacheControlHeaders, $header, $value);
if(self::$modification_date && $cacheAge > 0) {
$responseHeaders["Last-Modified"] = self::gmt_date(self::$modification_date);
2013-02-18 14:41:49 +01:00
// Chrome ignores Varies when redirecting back (http://code.google.com/p/chromium/issues/detail?id=79758)
2014-08-15 18:53:05 +12:00
// which means that if you log out, you get redirected back to a page which Chrome then checks against
2013-02-18 14:41:49 +01:00
// last-modified (which passes, getting a 304)
// when it shouldn't be trying to use that page at all because it's the "logged in" version.
2014-08-15 18:53:05 +12:00
// By also using and etag that includes both the modification date and all the varies
2013-02-18 14:41:49 +01:00
// values which we also check against we can catch this and not return a 304
$etagParts = array(self::$modification_date, serialize($_COOKIE));
$etagParts[] = Director::is_https() ? 'https' : 'http';
if (isset($_SERVER['HTTP_USER_AGENT'])) $etagParts[] = $_SERVER['HTTP_USER_AGENT'];
if (isset($_SERVER['HTTP_ACCEPT'])) $etagParts[] = $_SERVER['HTTP_ACCEPT'];
$etag = sha1(implode(':', $etagParts));
$responseHeaders["ETag"] = $etag;
// 304 response detection
if(isset($_SERVER['HTTP_IF_MODIFIED_SINCE'])) {
$ifModifiedSince = strtotime(stripslashes($_SERVER['HTTP_IF_MODIFIED_SINCE']));
// As above, only 304 if the last request had all the same varies values
// (or the etag isn't passed as part of the request - but with chrome it always is)
$matchesEtag = !isset($_SERVER['HTTP_IF_NONE_MATCH']) || $_SERVER['HTTP_IF_NONE_MATCH'] == $etag;
if($ifModifiedSince >= self::$modification_date && $matchesEtag) {
if($body) {
$body->setStatusCode(304);
$body->setBody('');
} else {
header('HTTP/1.0 304 Not Modified');
die();
}
}
}
$expires = time() + $cacheAge;
$responseHeaders["Expires"] = self::gmt_date($expires);
}
if(self::$etag) {
$responseHeaders['ETag'] = self::$etag;
}
2014-08-15 18:53:05 +12:00
// Now that we've generated them, either output them or attach them to the SS_HTTPResponse as appropriate
foreach($responseHeaders as $k => $v) {
if($body) {
// Set the header now if it's not already set.
if ($body->getHeader($k) === null) {
$body->addHeader($k, $v);
}
} elseif(!headers_sent()) {
header("$k: $v");
}
}
}
/**
* Return an {@link http://www.faqs.org/rfcs/rfc2822 RFC 2822} date in the
* GMT timezone (a timestamp is always in GMT: the number of seconds
* since January 1 1970 00:00:00 GMT)
*/
public static function gmt_date($timestamp) {
return gmdate('D, d M Y H:i:s', $timestamp) . ' GMT';
}
2014-08-15 18:53:05 +12:00
/*
* Return static variable cache_age in second
*/
public static function get_cache_age() {
return self::$cache_age;
}
}