<?php
/**
 * Library of conversion functions, implemented as static methods.
 *
 * The methods are all of the form (format)2(format), where the format is one of
 *
 *  raw: A UTF8 string
 *  attr: A UTF8 string suitable for inclusion in an HTML attribute
 *  js: A UTF8 string suitable for inclusion in a double-quoted javascript string.
 *
 *  array: A PHP associative array
 *  json: JavaScript object notation
 *
 *  html: HTML source suitable for use in a page or email
 *  text: Plain-text content, suitable for display to a user as-is, or insertion in a plaintext email.
 *
 * Objects of type {@link ViewableData} can have an "escaping type",
 * which determines if they are automatically escaped before output by {@link SSViewer}.
 *
 * @package framework
 * @subpackage misc
 */
class Convert {

	/**
	 * Convert a value to be suitable for an XML attribute.
	 *
	 * @param array|string $val String to escape, or array of strings
	 * @return array|string
	 */
	public static function raw2att($val) {
		return self::raw2xml($val);
	}

	/**
	 * Convert a value to be suitable for an HTML attribute.
	 *
	 * @param string|array $val String to escape, or array of strings
	 * @return array|string
	 */
	public static function raw2htmlatt($val) {
		return self::raw2att($val);
	}

	/**
	 * Convert a value to be suitable for an HTML ID attribute. Replaces non
	 * supported characters with a space.
	 *
	 * @see http://www.w3.org/TR/REC-html40/types.html#type-cdata
	 *
	 * @param array|string $val String to escape, or array of strings
	 *
	 * @return array|string
	 */
	public static function raw2htmlname($val) {
		if(is_array($val)) {
			foreach($val as $k => $v) {
				$val[$k] = self::raw2htmlname($v);
			}

			return $val;
		} else {
			return self::raw2att($val);
		}
	}

	/**
	 * Convert a value to be suitable for an HTML ID attribute. Replaces non
	 * supported characters with an underscore.
	 *
	 * @see http://www.w3.org/TR/REC-html40/types.html#type-cdata
	 *
	 * @param array|string $val String to escape, or array of strings
	 *
	 * @return array|string
	 */
	public static function raw2htmlid($val) {
		if(is_array($val)) {
			foreach($val as $k => $v) {
				$val[$k] = self::raw2htmlid($v);
			}

			return $val;
		} else {
			return trim(preg_replace(
				'/_+/', '_', preg_replace('/[^a-zA-Z0-9\-_:.]+/','_', $val)),
				'_'
			);
		}
	}

	/**
	 * Ensure that text is properly escaped for XML.
	 *
	 * @see http://www.w3.org/TR/REC-xml/#dt-escape
	 * @param array|string $val String to escape, or array of strings
	 * @return array|string
	 */
	public static function raw2xml($val) {
		if(is_array($val)) {
			foreach($val as $k => $v) $val[$k] = self::raw2xml($v);
			return $val;
		} else {
			return htmlspecialchars($val, ENT_QUOTES, 'UTF-8');
		}
	}

	/**
	 * Ensure that text is properly escaped for Javascript.
	 *
	 * @param array|string $val String to escape, or array of strings
	 * @return array|string
	 */
	public static function raw2js($val) {
		if(is_array($val)) {
			foreach($val as $k => $v) $val[$k] = self::raw2js($v);
			return $val;
		} else {
			return str_replace(
				// Intercepts some characters such as <, >, and & which can interfere
				array("\\", '"', "\n", "\r", "'", "<", ">", "&"),
				array("\\\\", '\"', '\n', '\r', "\\'", "\\x3c", "\\x3e", "\\x26"),
				$val
			);
		}
	}

	/**
	 * Encode a value as a JSON encoded string.
	 *
	 * @param mixed $val Value to be encoded
	 * @return string JSON encoded string
	 */
	public static function raw2json($val) {
		return json_encode($val);
	}

	/**
	 * Encode an array as a JSON encoded string.
	 * THis is an alias to {@link raw2json()}
	 *
	 * @param array $val Array to convert
	 * @return string JSON encoded string
	 */
	public static function array2json($val) {
		return self::raw2json($val);
	}

	/**
	 * Safely encodes a value (or list of values) using the current database's
	 * safe string encoding method
	 *
	 * @param mixed|array $val Input value, or list of values as an array
	 * @param boolean $quoted Flag indicating whether the value should be safely
	 * quoted, instead of only being escaped. By default this function will
	 * only escape the string (false).
	 * @return string|array Safely encoded value in the same format as the input
	 */
	public static function raw2sql($val, $quoted = false) {
		if(is_array($val)) {
			foreach($val as $k => $v) {
				$val[$k] = self::raw2sql($v, $quoted);
			}
			return $val;
		} else {
			if($quoted) {
				return DB::get_conn()->quoteString($val);
			} else {
				return DB::get_conn()->escapeString($val);
			}
		}
	}

	/**
	 * Safely encodes a SQL symbolic identifier (or list of identifiers), such as a database,
	 * table, or column name. Supports encoding of multi identfiers separated by
	 * a delimiter (e.g. ".")
	 *
	 * @param string|array $identifier The identifier to escape. E.g. 'SiteTree.Title'
	 * @param string $separator The string that delimits subsequent identifiers
	 * @return string|array The escaped identifier. E.g. '"SiteTree"."Title"'
	 */
	public static function symbol2sql($identifier, $separator = '.') {
		if(is_array($identifier)) {
			foreach($identifier as $k => $v) {
				$identifier[$k] = self::symbol2sql($v, $separator);
			}
			return $identifier;
		} else {
			return DB::get_conn()->escapeIdentifier($identifier, $separator);
		}
	}

	/**
	 * Convert XML to raw text.
	 * @uses html2raw()
	 * @todo Currently &#xxx; entries are stripped; they should be converted
	 */
	public static function xml2raw($val) {
		if(is_array($val)) {
			foreach($val as $k => $v) $val[$k] = self::xml2raw($v);
			return $val;
		} else {
			// More complex text needs to use html2raw instead
			if(strpos($val,'<') !== false) return self::html2raw($val);
			else return html_entity_decode($val, ENT_QUOTES, 'UTF-8');
		}
	}

	/**
	 * Convert a JSON encoded string into an object.
	 *
	 * @param string $val
	 * @return object|boolean
	 */
	public static function json2obj($val) {
		return json_decode($val);
	}

	/**
	 * Convert a JSON string into an array.
	 *
	 * @uses json2obj
	 * @param string $val JSON string to convert
	 * @return array|boolean
	 */
	public static function json2array($val) {
		return json_decode($val, true);
	}

	/**
	 * Converts an XML string to a PHP array
	 *
	 * @uses recursiveXMLToArray()
	 * @param string
	 *
	 * @return array
	 */
	public static function xml2array($val) {
		$xml = new SimpleXMLElement($val);
		return self::recursiveXMLToArray($xml);
	}

	/**
	 * Convert a XML string to a PHP array recursively. Do not
	 * call this function directly, Please use {@link Convert::xml2array()}
	 *
	 * @param SimpleXMLElement
	 *
	 * @return mixed
	 */
	protected static function recursiveXMLToArray($xml) {
		if(is_object($xml) && get_class($xml) == 'SimpleXMLElement') {
			$attributes = $xml->attributes();
			foreach($attributes as $k => $v) {
				if($v) $a[$k] = (string) $v;
			}
			$x = $xml;
			$xml = get_object_vars($xml);
		}
		if(is_array($xml)) {
			if(count($xml) == 0) return (string) $x; // for CDATA
			foreach($xml as $key => $value) {
				$r[$key] = self::recursiveXMLToArray($value);
			}
			if(isset($a)) $r['@'] = $a; // Attributes
			return $r;
		}

		return (string) $xml;
	}

	/**
	 * Create a link if the string is a valid URL
	 *
	 * @param string The string to linkify
	 * @return A link to the URL if string is a URL
	 */
	public static function linkIfMatch($string) {
		if( preg_match( '/^[a-z+]+\:\/\/[a-zA-Z0-9$-_.+?&=!*\'()%]+$/', $string ) )
			return "<a style=\"white-space: nowrap\" href=\"$string\">$string</a>";
		else
			return $string;
	}

	/**
	 * Simple conversion of HTML to plaintext.
	 *
	 * @param $data string
	 * @param $preserveLinks boolean
	 * @param $wordwrap array
	 */
	public static function html2raw($data, $preserveLinks = false, $wordWrap = 60, $config = null) {
		$defaultConfig = array(
			'PreserveLinks' => false,
			'ReplaceBoldAsterisk' => true,
			'CompressWhitespace' => true,
			'ReplaceImagesWithAlt' => true,
		);
		if(isset($config)) {
			$config = array_merge($defaultConfig,$config);
		} else {
			$config = $defaultConfig;
		}

		$data = preg_replace("/<style([^A-Za-z0-9>][^>]*)?>.*?<\/style[^>]*>/is","", $data);
		$data = preg_replace("/<script([^A-Za-z0-9>][^>]*)?>.*?<\/script[^>]*>/is","", $data);

		if($config['ReplaceBoldAsterisk']) {
			$data = preg_replace('%<(strong|b)( [^>]*)?>|</(strong|b)>%i','*',$data);
		}

		// Expand hyperlinks
		if(!$preserveLinks && !$config['PreserveLinks']) {
			$data = preg_replace_callback('/<a[^>]*href\s*=\s*"([^"]*)">(.*?)<\/a>/i', function($matches) {
				return Convert::html2raw($matches[2]) . "[$matches[1]]";
			}, $data);
			$data = preg_replace_callback('/<a[^>]*href\s*=\s*([^ ]*)>(.*?)<\/a>/i', function($matches) {
				return Convert::html2raw($matches[2]) . "[$matches[1]]";
			}, $data);
		}

		// Replace images with their alt tags
		if($config['ReplaceImagesWithAlt']) {
			$data = preg_replace('/<img[^>]*alt *= *"([^"]*)"[^>]*>/i', ' \\1 ', $data);
			$data = preg_replace('/<img[^>]*alt *= *([^ ]*)[^>]*>/i', ' \\1 ', $data);
		}

		// Compress whitespace
		if($config['CompressWhitespace']) {
			$data = preg_replace("/\s+/", " ", $data);
		}

		// Parse newline tags
		$data = preg_replace("/\s*<[Hh][1-6]([^A-Za-z0-9>][^>]*)?> */", "\n\n", $data);
		$data = preg_replace("/\s*<[Pp]([^A-Za-z0-9>][^>]*)?> */", "\n\n", $data);
		$data = preg_replace("/\s*<[Dd][Ii][Vv]([^A-Za-z0-9>][^>]*)?> */", "\n\n", $data);
		$data = preg_replace("/\n\n\n+/", "\n\n", $data);

		$data = preg_replace("/<[Bb][Rr]([^A-Za-z0-9>][^>]*)?> */", "\n", $data);
		$data = preg_replace("/<[Tt][Rr]([^A-Za-z0-9>][^>]*)?> */", "\n", $data);
		$data = preg_replace("/<\/[Tt][Dd]([^A-Za-z0-9>][^>]*)?> */", "    ", $data);
		$data = preg_replace('/<\/p>/i', "\n\n", $data );

		// Replace HTML entities
		//$data = preg_replace("/&#([0-9]+);/e", 'chr(\1)', $data);
		//$data = str_replace(array("&lt;","&gt;","&amp;","&nbsp;"), array("<", ">", "&", " "), $data);
		$data = html_entity_decode($data, ENT_COMPAT , 'UTF-8');
		// Remove all tags (but optionally keep links)

		// strip_tags seemed to be restricting the length of the output
		// arbitrarily. This essentially does the same thing.
		if(!$preserveLinks && !$config['PreserveLinks']) {
			$data = preg_replace('/<\/?[^>]*>/','', $data);
		} else {
			$data = strip_tags($data, '<a>');
		}
		return trim(wordwrap(trim($data), $wordWrap));
	}

	/**
	 * There are no real specifications on correctly encoding mailto-links,
	 * but this seems to be compatible with most of the user-agents.
	 * Does nearly the same as rawurlencode().
	 * Please only encode the values, not the whole url, e.g.
	 * "mailto:test@test.com?subject=" . Convert::raw2mailto($subject)
	 *
	 * @param $data string
	 * @return string
	 * @see http://www.ietf.org/rfc/rfc1738.txt
	 */
	public static function raw2mailto($data) {
		return str_ireplace(
			array("\n",'?','=',' ','(',')','&','@','"','\'',';'),
			array('%0A','%3F','%3D','%20','%28','%29','%26','%40','%22','%27','%3B'),
			$data
		);
	}

	/**
	 * Convert a string (normally a title) to a string suitable for using in
	 * urls and other html attributes. Uses {@link URLSegmentFilter}.
	 *
	 * @param string
	 * @return string
	 */
	public static function raw2url($title) {
		$f = URLSegmentFilter::create();
		return $f->filter($title);
	}

	/**
	 * Normalises newline sequences to conform to (an) OS specific format.
	 *
	 * @param string $data Text containing potentially mixed formats of newline
	 * sequences including \r, \r\n, \n, or unicode newline characters
	 * @param string $nl The newline sequence to normalise to. Defaults to that
	 * specified by the current OS
	 */
	public static function nl2os($data, $nl = PHP_EOL) {
		return preg_replace('~\R~u', $nl, $data);
	}
}