Sam Minnee cca7e9697c FIX: Correct PHP4-style constructors in SimpleTest.
Note that the best solution to this will be to remove the use of
SimpleTest entirely. This is quick fix is intended to help us get PHP7
tests running without needing to cross that bridge.
2017-04-05 11:05:27 +10:00

532 lines
16 KiB
PHP

<?php
/**
* base include file for SimpleTest
* @package SimpleTest
* @subpackage WebTester
* @version $Id: url.php 1723 2008-04-08 00:34:10Z lastcraft $
*/
/**#@+
* include other SimpleTest class files
*/
require_once(dirname(__FILE__) . '/encoding.php');
/**#@-*/
/**
* URL parser to replace parse_url() PHP function which
* got broken in PHP 4.3.0. Adds some browser specific
* functionality such as expandomatics.
* Guesses a bit trying to separate the host from
* the path and tries to keep a raw, possibly unparsable,
* request string as long as possible.
* @package SimpleTest
* @subpackage WebTester
*/
class SimpleUrl {
var $_scheme;
var $_username;
var $_password;
var $_host;
var $_port;
var $_path;
var $_request;
var $_fragment;
var $_x;
var $_y;
var $_target;
var $_raw = false;
/**
* Constructor. Parses URL into sections.
* @param string $url Incoming URL.
* @access public
*/
function __construct($url = '') {
list($x, $y) = $this->_chompCoordinates($url);
$this->setCoordinates($x, $y);
$this->_scheme = $this->_chompScheme($url);
list($this->_username, $this->_password) = $this->_chompLogin($url);
$this->_host = $this->_chompHost($url);
$this->_port = false;
if (preg_match('/(.*?):(.*)/', $this->_host, $host_parts)) {
$this->_host = $host_parts[1];
$this->_port = (integer)$host_parts[2];
}
$this->_path = $this->_chompPath($url);
$this->_request = $this->_parseRequest($this->_chompRequest($url));
$this->_fragment = (strncmp($url, "#", 1) == 0 ? substr($url, 1) : false);
$this->_target = false;
}
/**
* Extracts the X, Y coordinate pair from an image map.
* @param string $url URL so far. The coordinates will be
* removed.
* @return array X, Y as a pair of integers.
* @access private
*/
function _chompCoordinates(&$url) {
if (preg_match('/(.*)\?(\d+),(\d+)$/', $url, $matches)) {
$url = $matches[1];
return array((integer)$matches[2], (integer)$matches[3]);
}
return array(false, false);
}
/**
* Extracts the scheme part of an incoming URL.
* @param string $url URL so far. The scheme will be
* removed.
* @return string Scheme part or false.
* @access private
*/
function _chompScheme(&$url) {
if (preg_match('/^([^\/:]*):(\/\/)(.*)/', $url, $matches)) {
$url = $matches[2] . $matches[3];
return $matches[1];
}
return false;
}
/**
* Extracts the username and password from the
* incoming URL. The // prefix will be reattached
* to the URL after the doublet is extracted.
* @param string $url URL so far. The username and
* password are removed.
* @return array Two item list of username and
* password. Will urldecode() them.
* @access private
*/
function _chompLogin(&$url) {
$prefix = '';
if (preg_match('/^(\/\/)(.*)/', $url, $matches)) {
$prefix = $matches[1];
$url = $matches[2];
}
if (preg_match('/^([^\/]*)@(.*)/', $url, $matches)) {
$url = $prefix . $matches[2];
$parts = preg_split('/:/', $matches[1]);
return array(
urldecode($parts[0]),
isset($parts[1]) ? urldecode($parts[1]) : false);
}
$url = $prefix . $url;
return array(false, false);
}
/**
* Extracts the host part of an incoming URL.
* Includes the port number part. Will extract
* the host if it starts with // or it has
* a top level domain or it has at least two
* dots.
* @param string $url URL so far. The host will be
* removed.
* @return string Host part guess or false.
* @access private
*/
function _chompHost(&$url) {
if (preg_match('/^(\/\/)(.*?)(\/.*|\?.*|#.*|$)/', $url, $matches)) {
$url = $matches[3];
return $matches[2];
}
if (preg_match('/(.*?)(\.\.\/|\.\/|\/|\?|#|$)(.*)/', $url, $matches)) {
$tlds = SimpleUrl::getAllTopLevelDomains();
if (preg_match('/[a-z0-9\-]+\.(' . $tlds . ')/i', $matches[1])) {
$url = $matches[2] . $matches[3];
return $matches[1];
} elseif (preg_match('/[a-z0-9\-]+\.[a-z0-9\-]+\.[a-z0-9\-]+/i', $matches[1])) {
$url = $matches[2] . $matches[3];
return $matches[1];
}
}
return false;
}
/**
* Extracts the path information from the incoming
* URL. Strips this path from the URL.
* @param string $url URL so far. The host will be
* removed.
* @return string Path part or '/'.
* @access private
*/
function _chompPath(&$url) {
if (preg_match('/(.*?)(\?|#|$)(.*)/', $url, $matches)) {
$url = $matches[2] . $matches[3];
return ($matches[1] ? $matches[1] : '');
}
return '';
}
/**
* Strips off the request data.
* @param string $url URL so far. The request will be
* removed.
* @return string Raw request part.
* @access private
*/
function _chompRequest(&$url) {
if (preg_match('/\?(.*?)(#|$)(.*)/', $url, $matches)) {
$url = $matches[2] . $matches[3];
return $matches[1];
}
return '';
}
/**
* Breaks the request down into an object.
* @param string $raw Raw request.
* @return SimpleFormEncoding Parsed data.
* @access private
*/
function _parseRequest($raw) {
$this->_raw = $raw;
$request = new SimpleGetEncoding();
foreach (preg_split('/&/', $raw) as $pair) {
if (preg_match('/(.*?)=(.*)/', $pair, $matches)) {
$request->add($matches[1], urldecode($matches[2]));
} elseif ($pair) {
$request->add($pair, '');
}
}
return $request;
}
/**
* Accessor for protocol part.
* @param string $default Value to use if not present.
* @return string Scheme name, e.g "http".
* @access public
*/
function getScheme($default = false) {
return $this->_scheme ? $this->_scheme : $default;
}
/**
* Accessor for user name.
* @return string Username preceding host.
* @access public
*/
function getUsername() {
return $this->_username;
}
/**
* Accessor for password.
* @return string Password preceding host.
* @access public
*/
function getPassword() {
return $this->_password;
}
/**
* Accessor for hostname and port.
* @param string $default Value to use if not present.
* @return string Hostname only.
* @access public
*/
function getHost($default = false) {
return $this->_host ? $this->_host : $default;
}
/**
* Accessor for top level domain.
* @return string Last part of host.
* @access public
*/
function getTld() {
$path_parts = pathinfo($this->getHost());
return (isset($path_parts['extension']) ? $path_parts['extension'] : false);
}
/**
* Accessor for port number.
* @return integer TCP/IP port number.
* @access public
*/
function getPort() {
return $this->_port;
}
/**
* Accessor for path.
* @return string Full path including leading slash if implied.
* @access public
*/
function getPath() {
if (! $this->_path && $this->_host) {
return '/';
}
return $this->_path;
}
/**
* Accessor for page if any. This may be a
* directory name if ambiguious.
* @return Page name.
* @access public
*/
function getPage() {
if (! preg_match('/([^\/]*?)$/', $this->getPath(), $matches)) {
return false;
}
return $matches[1];
}
/**
* Gets the path to the page.
* @return string Path less the page.
* @access public
*/
function getBasePath() {
if (! preg_match('/(.*\/)[^\/]*?$/', $this->getPath(), $matches)) {
return false;
}
return $matches[1];
}
/**
* Accessor for fragment at end of URL after the "#".
* @return string Part after "#".
* @access public
*/
function getFragment() {
return $this->_fragment;
}
/**
* Sets image coordinates. Set to false to clear
* them.
* @param integer $x Horizontal position.
* @param integer $y Vertical position.
* @access public
*/
function setCoordinates($x = false, $y = false) {
if (($x === false) || ($y === false)) {
$this->_x = $this->_y = false;
return;
}
$this->_x = (integer)$x;
$this->_y = (integer)$y;
}
/**
* Accessor for horizontal image coordinate.
* @return integer X value.
* @access public
*/
function getX() {
return $this->_x;
}
/**
* Accessor for vertical image coordinate.
* @return integer Y value.
* @access public
*/
function getY() {
return $this->_y;
}
/**
* Accessor for current request parameters
* in URL string form. Will return teh original request
* if at all possible even if it doesn't make much
* sense.
* @return string Form is string "?a=1&b=2", etc.
* @access public
*/
function getEncodedRequest() {
if ($this->_raw) {
$encoded = $this->_raw;
} else {
$encoded = $this->_request->asUrlRequest();
}
if ($encoded) {
return '?' . preg_replace('/^\?/', '', $encoded);
}
return '';
}
/**
* Adds an additional parameter to the request.
* @param string $key Name of parameter.
* @param string $value Value as string.
* @access public
*/
function addRequestParameter($key, $value) {
$this->_raw = false;
$this->_request->add($key, $value);
}
/**
* Adds additional parameters to the request.
* @param hash/SimpleFormEncoding $parameters Additional
* parameters.
* @access public
*/
function addRequestParameters($parameters) {
$this->_raw = false;
$this->_request->merge($parameters);
}
/**
* Clears down all parameters.
* @access public
*/
function clearRequest() {
$this->_raw = false;
$this->_request = new SimpleGetEncoding();
}
/**
* Gets the frame target if present. Although
* not strictly part of the URL specification it
* acts as similarily to the browser.
* @return boolean/string Frame name or false if none.
* @access public
*/
function getTarget() {
return $this->_target;
}
/**
* Attaches a frame target.
* @param string $frame Name of frame.
* @access public
*/
function setTarget($frame) {
$this->_raw = false;
$this->_target = $frame;
}
/**
* Renders the URL back into a string.
* @return string URL in canonical form.
* @access public
*/
function asString() {
$path = $this->_path;
$scheme = $identity = $host = $port = $encoded = $fragment = '';
if ($this->_username && $this->_password) {
$identity = $this->_username . ':' . $this->_password . '@';
}
if ($this->getHost()) {
$scheme = $this->getScheme() ? $this->getScheme() : 'http';
$scheme .= "://";
$host = $this->getHost();
}
if ($this->getPort() && $this->getPort() != 80 ) {
$port = ':'.$this->getPort();
}
if (substr($this->_path, 0, 1) == '/') {
$path = $this->normalisePath($this->_path);
}
$encoded = $this->getEncodedRequest();
$fragment = $this->getFragment() ? '#'. $this->getFragment() : '';
$coords = $this->getX() === false ? '' : '?' . $this->getX() . ',' . $this->getY();
return "$scheme$identity$host$port$path$encoded$fragment$coords";
}
/**
* Replaces unknown sections to turn a relative
* URL into an absolute one. The base URL can
* be either a string or a SimpleUrl object.
* @param string/SimpleUrl $base Base URL.
* @access public
*/
function makeAbsolute($base) {
if (! is_object($base)) {
$base = new SimpleUrl($base);
}
if ($this->getHost()) {
$scheme = $this->getScheme();
$host = $this->getHost();
$port = $this->getPort() ? ':' . $this->getPort() : '';
$identity = $this->getIdentity() ? $this->getIdentity() . '@' : '';
if (! $identity) {
$identity = $base->getIdentity() ? $base->getIdentity() . '@' : '';
}
} else {
$scheme = $base->getScheme();
$host = $base->getHost();
$port = $base->getPort() ? ':' . $base->getPort() : '';
$identity = $base->getIdentity() ? $base->getIdentity() . '@' : '';
}
$path = $this->normalisePath($this->_extractAbsolutePath($base));
$encoded = $this->getEncodedRequest();
$fragment = $this->getFragment() ? '#'. $this->getFragment() : '';
$coords = $this->getX() === false ? '' : '?' . $this->getX() . ',' . $this->getY();
return new SimpleUrl("$scheme://$identity$host$port$path$encoded$fragment$coords");
}
/**
* Replaces unknown sections of the path with base parts
* to return a complete absolute one.
* @param string/SimpleUrl $base Base URL.
* @param string Absolute path.
* @access private
*/
function _extractAbsolutePath($base) {
if ($this->getHost()) {
return $this->_path;
}
if (! $this->_isRelativePath($this->_path)) {
return $this->_path;
}
if ($this->_path) {
return $base->getBasePath() . $this->_path;
}
return $base->getPath();
}
/**
* Simple test to see if a path part is relative.
* @param string $path Path to test.
* @return boolean True if starts with a "/".
* @access private
*/
function _isRelativePath($path) {
return (substr($path, 0, 1) != '/');
}
/**
* Extracts the username and password for use in rendering
* a URL.
* @return string/boolean Form of username:password or false.
* @access public
*/
function getIdentity() {
if ($this->_username && $this->_password) {
return $this->_username . ':' . $this->_password;
}
return false;
}
/**
* Replaces . and .. sections of the path.
* @param string $path Unoptimised path.
* @return string Path with dots removed if possible.
* @access public
*/
function normalisePath($path) {
$path = preg_replace('|/\./|', '/', $path);
return preg_replace('|/[^/]+/\.\./|', '/', $path);
}
/**
* A pipe seperated list of all TLDs that result in two part
* domain names.
* @return string Pipe separated list.
* @access public
* @static
*/
function getAllTopLevelDomains() {
return 'com|edu|net|org|gov|mil|int|biz|info|name|pro|aero|coop|museum';
}
}
?>