2007-07-19 10:40:28 +00:00
|
|
|
<?php
|
|
|
|
/**
|
|
|
|
* Library of conversion functions, implemented as static methods.
|
|
|
|
*
|
|
|
|
* The methods are all of the form (format)2(format), where the format is one of
|
|
|
|
*
|
|
|
|
* raw: A UTF8 string
|
|
|
|
* attr: A UTF8 string suitable for inclusion in an HTML attribute
|
|
|
|
* js: A UTF8 string suitable for inclusion in a double-quoted javascript string.
|
|
|
|
*
|
|
|
|
* array: A PHP associative array
|
|
|
|
* json: JavaScript object notation
|
|
|
|
*
|
|
|
|
* html: HTML source suitable for use in a page or email
|
|
|
|
* text: Plain-text content, suitable for display to a user as-is, or insertion in a plaintext email.
|
2008-02-25 02:10:37 +00:00
|
|
|
*
|
|
|
|
* Objects of type {@link ViewableData} can have an "escaping type",
|
|
|
|
* which determines if they are automatically escaped before output by {@link SSViewer}.
|
|
|
|
*
|
2012-04-12 18:02:46 +12:00
|
|
|
* @package framework
|
2008-02-25 02:10:37 +00:00
|
|
|
* @subpackage misc
|
2007-07-19 10:40:28 +00:00
|
|
|
*/
|
2010-05-25 04:19:38 +00:00
|
|
|
class Convert {
|
2007-07-19 10:40:28 +00:00
|
|
|
|
2009-05-20 03:09:50 +00:00
|
|
|
/**
|
|
|
|
* Convert a value to be suitable for an XML attribute.
|
|
|
|
*
|
|
|
|
* @param array|string $val String to escape, or array of strings
|
|
|
|
* @return array|string
|
|
|
|
*/
|
2012-09-19 12:07:39 +02:00
|
|
|
public static function raw2att($val) {
|
2010-12-16 22:55:17 +00:00
|
|
|
return self::raw2xml($val);
|
2007-07-19 10:40:28 +00:00
|
|
|
}
|
2010-12-16 22:55:17 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Convert a value to be suitable for an HTML attribute.
|
|
|
|
*
|
|
|
|
* @param string|array $val String to escape, or array of strings
|
|
|
|
* @return array|string
|
|
|
|
*/
|
2012-09-19 12:07:39 +02:00
|
|
|
public static function raw2htmlatt($val) {
|
2010-12-16 22:55:17 +00:00
|
|
|
return self::raw2att($val);
|
|
|
|
}
|
|
|
|
|
2007-07-19 10:40:28 +00:00
|
|
|
/**
|
2009-05-20 03:09:50 +00:00
|
|
|
* Convert a value to be suitable for an HTML attribute.
|
|
|
|
*
|
|
|
|
* This is useful for converting human readable values into
|
|
|
|
* a value suitable for an ID or NAME attribute.
|
|
|
|
*
|
2007-07-19 10:40:28 +00:00
|
|
|
* @see http://www.w3.org/TR/REC-html40/types.html#type-cdata
|
2009-05-20 03:09:50 +00:00
|
|
|
* @uses Convert::raw2att()
|
|
|
|
* @param array|string $val String to escape, or array of strings
|
|
|
|
* @return array|string
|
2007-07-19 10:40:28 +00:00
|
|
|
*/
|
2012-09-19 12:07:39 +02:00
|
|
|
public static function raw2htmlname($val) {
|
2007-07-19 10:40:28 +00:00
|
|
|
if(is_array($val)) {
|
2010-12-16 22:55:17 +00:00
|
|
|
foreach($val as $k => $v) $val[$k] = self::raw2htmlname($v);
|
2007-07-19 10:40:28 +00:00
|
|
|
return $val;
|
|
|
|
} else {
|
2010-12-16 22:55:17 +00:00
|
|
|
return preg_replace('/[^a-zA-Z0-9\-_:.]+/','', $val);
|
2007-07-19 10:40:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-01-05 06:19:48 +00:00
|
|
|
/**
|
|
|
|
* Ensure that text is properly escaped for XML.
|
2009-05-20 03:09:50 +00:00
|
|
|
*
|
|
|
|
* @see http://www.w3.org/TR/REC-xml/#dt-escape
|
2009-01-05 06:19:48 +00:00
|
|
|
* @param array|string $val String to escape, or array of strings
|
|
|
|
* @return array|string
|
|
|
|
*/
|
2012-09-19 12:07:39 +02:00
|
|
|
public static function raw2xml($val) {
|
2007-07-19 10:40:28 +00:00
|
|
|
if(is_array($val)) {
|
|
|
|
foreach($val as $k => $v) $val[$k] = self::raw2xml($v);
|
|
|
|
return $val;
|
|
|
|
} else {
|
2010-12-16 22:55:17 +00:00
|
|
|
return htmlspecialchars($val, ENT_QUOTES, 'UTF-8');
|
2007-07-19 10:40:28 +00:00
|
|
|
}
|
|
|
|
}
|
2009-01-05 06:19:48 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Ensure that text is properly escaped for Javascript.
|
|
|
|
*
|
|
|
|
* @param array|string $val String to escape, or array of strings
|
|
|
|
* @return array|string
|
|
|
|
*/
|
2012-09-19 12:07:39 +02:00
|
|
|
public static function raw2js($val) {
|
2007-07-19 10:40:28 +00:00
|
|
|
if(is_array($val)) {
|
|
|
|
foreach($val as $k => $v) $val[$k] = self::raw2js($v);
|
|
|
|
return $val;
|
|
|
|
} else {
|
2014-04-02 17:19:14 +13:00
|
|
|
return str_replace(
|
|
|
|
// Intercepts some characters such as <, >, and & which can interfere
|
|
|
|
array("\\", '"', "\n", "\r", "'", "<", ">", "&"),
|
|
|
|
array("\\\\", '\"', '\n', '\r', "\\'", "\\x3c", "\\x3e", "\\x26"),
|
|
|
|
$val
|
|
|
|
);
|
2007-07-19 10:40:28 +00:00
|
|
|
}
|
|
|
|
}
|
2012-03-30 16:18:14 +13:00
|
|
|
|
2008-08-09 06:46:48 +00:00
|
|
|
/**
|
2012-03-30 16:18:14 +13:00
|
|
|
* Encode a value as a JSON encoded string.
|
2008-08-09 06:46:48 +00:00
|
|
|
*
|
2012-03-30 16:18:14 +13:00
|
|
|
* @param mixed $val Value to be encoded
|
|
|
|
* @return string JSON encoded string
|
2008-08-09 06:46:48 +00:00
|
|
|
*/
|
2012-09-19 12:07:39 +02:00
|
|
|
public static function raw2json($val) {
|
2012-03-30 16:18:14 +13:00
|
|
|
return json_encode($val);
|
2008-08-09 06:46:48 +00:00
|
|
|
}
|
2012-03-30 16:18:14 +13:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Encode an array as a JSON encoded string.
|
|
|
|
* THis is an alias to {@link raw2json()}
|
|
|
|
*
|
|
|
|
* @param array $val Array to convert
|
|
|
|
* @return string JSON encoded string
|
|
|
|
*/
|
2012-09-19 12:07:39 +02:00
|
|
|
public static function array2json($val) {
|
2012-03-30 16:18:14 +13:00
|
|
|
return self::raw2json($val);
|
|
|
|
}
|
|
|
|
|
2012-09-19 12:07:39 +02:00
|
|
|
public static function raw2sql($val) {
|
2007-07-19 10:40:28 +00:00
|
|
|
if(is_array($val)) {
|
|
|
|
foreach($val as $k => $v) $val[$k] = self::raw2sql($v);
|
|
|
|
return $val;
|
|
|
|
} else {
|
2009-03-11 21:44:10 +00:00
|
|
|
return DB::getConn()->addslashes($val);
|
2007-07-19 10:40:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2009-05-20 03:09:50 +00:00
|
|
|
* Convert XML to raw text.
|
2008-10-16 11:49:51 +00:00
|
|
|
* @uses html2raw()
|
2007-07-19 10:40:28 +00:00
|
|
|
* @todo Currently &#xxx; entries are stripped; they should be converted
|
|
|
|
*/
|
2012-09-19 12:07:39 +02:00
|
|
|
public static function xml2raw($val) {
|
2007-07-19 10:40:28 +00:00
|
|
|
if(is_array($val)) {
|
|
|
|
foreach($val as $k => $v) $val[$k] = self::xml2raw($v);
|
|
|
|
return $val;
|
|
|
|
} else {
|
2009-05-20 03:09:50 +00:00
|
|
|
// More complex text needs to use html2raw instead
|
2007-07-19 10:40:28 +00:00
|
|
|
if(strpos($val,'<') !== false) return self::html2raw($val);
|
2010-12-16 22:55:17 +00:00
|
|
|
else return html_entity_decode($val, ENT_QUOTES, 'UTF-8');
|
2007-07-19 10:40:28 +00:00
|
|
|
}
|
|
|
|
}
|
2012-03-30 16:18:14 +13:00
|
|
|
|
2008-08-09 07:03:24 +00:00
|
|
|
/**
|
2010-05-25 03:53:58 +00:00
|
|
|
* Convert a JSON encoded string into an object.
|
2008-08-09 07:03:24 +00:00
|
|
|
*
|
|
|
|
* @param string $val
|
2012-03-30 16:23:55 +13:00
|
|
|
* @return object|boolean
|
2008-08-09 07:03:24 +00:00
|
|
|
*/
|
2012-09-19 12:07:39 +02:00
|
|
|
public static function json2obj($val) {
|
2012-03-30 16:18:14 +13:00
|
|
|
return json_decode($val);
|
2008-08-09 07:03:24 +00:00
|
|
|
}
|
|
|
|
|
2008-10-16 11:49:51 +00:00
|
|
|
/**
|
2009-05-20 22:52:33 +00:00
|
|
|
* Convert a JSON string into an array.
|
|
|
|
*
|
2008-10-16 11:49:51 +00:00
|
|
|
* @uses json2obj
|
2009-05-20 22:52:33 +00:00
|
|
|
* @param string $val JSON string to convert
|
|
|
|
* @return array|boolean
|
2008-10-16 11:49:51 +00:00
|
|
|
*/
|
2012-09-19 12:07:39 +02:00
|
|
|
public static function json2array($val) {
|
2012-03-30 16:18:14 +13:00
|
|
|
return json_decode($val, true);
|
2008-08-09 07:03:24 +00:00
|
|
|
}
|
|
|
|
|
2008-10-16 11:49:51 +00:00
|
|
|
/**
|
2010-12-20 03:18:51 +00:00
|
|
|
* Converts an XML string to a PHP array
|
2015-03-20 17:21:59 +13:00
|
|
|
* See http://phpsecurity.readthedocs.org/en/latest/Injection-Attacks.html#xml-external-entity-injection
|
2010-12-20 03:18:51 +00:00
|
|
|
*
|
2011-10-07 22:57:04 +02:00
|
|
|
* @uses recursiveXMLToArray()
|
2015-03-20 17:21:59 +13:00
|
|
|
* @param string $val
|
|
|
|
* @param boolean $disableDoctypes Disables the use of DOCTYPE, and will trigger an error if encountered.
|
|
|
|
* false by default.
|
|
|
|
* @param boolean $disableExternals Disables the loading of external entities. false by default.
|
2010-12-20 03:18:51 +00:00
|
|
|
* @return array
|
2008-10-16 11:49:51 +00:00
|
|
|
*/
|
2015-03-20 17:21:59 +13:00
|
|
|
public static function xml2array($val, $disableDoctypes = false, $disableExternals = false) {
|
|
|
|
// Check doctype
|
|
|
|
if($disableDoctypes && preg_match('/\<\!DOCTYPE.+]\>/', $val)) {
|
|
|
|
throw new InvalidArgumentException('XML Doctype parsing disabled');
|
|
|
|
}
|
|
|
|
|
|
|
|
// Disable external entity loading
|
|
|
|
if($disableExternals) $oldVal = libxml_disable_entity_loader($disableExternals);
|
|
|
|
try {
|
|
|
|
$xml = new SimpleXMLElement($val);
|
|
|
|
$result = self::recursiveXMLToArray($xml);
|
|
|
|
} catch(Exception $ex) {
|
|
|
|
if($disableExternals) libxml_disable_entity_loader($oldVal);
|
|
|
|
throw $ex;
|
|
|
|
}
|
|
|
|
if($disableExternals) libxml_disable_entity_loader($oldVal);
|
|
|
|
return $result;
|
2008-08-24 23:39:32 +00:00
|
|
|
}
|
2009-05-20 04:18:28 +00:00
|
|
|
|
|
|
|
/**
|
2010-12-20 03:18:51 +00:00
|
|
|
* Convert a XML string to a PHP array recursively. Do not
|
|
|
|
* call this function directly, Please use {@link Convert::xml2array()}
|
|
|
|
*
|
|
|
|
* @param SimpleXMLElement
|
|
|
|
*
|
|
|
|
* @return mixed
|
2009-05-20 04:18:28 +00:00
|
|
|
*/
|
2008-10-16 11:49:51 +00:00
|
|
|
protected static function recursiveXMLToArray($xml) {
|
2009-07-16 03:44:15 +00:00
|
|
|
if(is_object($xml) && get_class($xml) == 'SimpleXMLElement') {
|
2009-05-20 04:18:28 +00:00
|
|
|
$attributes = $xml->attributes();
|
|
|
|
foreach($attributes as $k => $v) {
|
|
|
|
if($v) $a[$k] = (string) $v;
|
|
|
|
}
|
|
|
|
$x = $xml;
|
|
|
|
$xml = get_object_vars($xml);
|
|
|
|
}
|
|
|
|
if(is_array($xml)) {
|
|
|
|
if(count($xml) == 0) return (string) $x; // for CDATA
|
|
|
|
foreach($xml as $key => $value) {
|
|
|
|
$r[$key] = self::recursiveXMLToArray($value);
|
|
|
|
}
|
|
|
|
if(isset($a)) $r['@'] = $a; // Attributes
|
|
|
|
return $r;
|
|
|
|
}
|
2010-12-20 03:18:51 +00:00
|
|
|
|
2009-05-20 04:18:28 +00:00
|
|
|
return (string) $xml;
|
2007-07-19 10:40:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Create a link if the string is a valid URL
|
2010-12-20 03:18:51 +00:00
|
|
|
*
|
2007-07-19 10:40:28 +00:00
|
|
|
* @param string The string to linkify
|
|
|
|
* @return A link to the URL if string is a URL
|
|
|
|
*/
|
2012-09-19 12:07:39 +02:00
|
|
|
public static function linkIfMatch($string) {
|
2007-07-19 10:40:28 +00:00
|
|
|
if( preg_match( '/^[a-z+]+\:\/\/[a-zA-Z0-9$-_.+?&=!*\'()%]+$/', $string ) )
|
|
|
|
return "<a style=\"white-space: nowrap\" href=\"$string\">$string</a>";
|
|
|
|
else
|
|
|
|
return $string;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Simple conversion of HTML to plaintext.
|
|
|
|
*
|
|
|
|
* @param $data string
|
|
|
|
* @param $preserveLinks boolean
|
|
|
|
* @param $wordwrap array
|
|
|
|
*/
|
2012-09-19 12:07:39 +02:00
|
|
|
public static function html2raw($data, $preserveLinks = false, $wordWrap = 60, $config = null) {
|
2007-07-19 10:40:28 +00:00
|
|
|
$defaultConfig = array(
|
|
|
|
'PreserveLinks' => false,
|
|
|
|
'ReplaceBoldAsterisk' => true,
|
|
|
|
'CompressWhitespace' => true,
|
|
|
|
'ReplaceImagesWithAlt' => true,
|
|
|
|
);
|
|
|
|
if(isset($config)) {
|
|
|
|
$config = array_merge($defaultConfig,$config);
|
|
|
|
} else {
|
|
|
|
$config = $defaultConfig;
|
|
|
|
}
|
|
|
|
|
2012-02-15 07:55:52 +13:00
|
|
|
$data = preg_replace("/<style([^A-Za-z0-9>][^>]*)?>.*?<\/style[^>]*>/is","", $data);
|
|
|
|
$data = preg_replace("/<script([^A-Za-z0-9>][^>]*)?>.*?<\/script[^>]*>/is","", $data);
|
2010-10-15 03:51:55 +00:00
|
|
|
|
2007-07-19 10:40:28 +00:00
|
|
|
if($config['ReplaceBoldAsterisk']) {
|
2010-10-15 03:51:55 +00:00
|
|
|
$data = preg_replace('%<(strong|b)( [^>]*)?>|</(strong|b)>%i','*',$data);
|
2007-07-19 10:40:28 +00:00
|
|
|
}
|
2010-10-15 03:51:55 +00:00
|
|
|
|
2007-07-19 10:40:28 +00:00
|
|
|
// Expand hyperlinks
|
2009-06-08 00:11:22 +00:00
|
|
|
if(!$preserveLinks && !$config['PreserveLinks']) {
|
2012-12-20 13:40:42 +13:00
|
|
|
$data = preg_replace_callback('/<a[^>]*href\s*=\s*"([^"]*)">(.*?)<\/a>/i', function($matches) {
|
|
|
|
return Convert::html2raw($matches[2]) . "[$matches[1]]";
|
|
|
|
}, $data);
|
|
|
|
$data = preg_replace_callback('/<a[^>]*href\s*=\s*([^ ]*)>(.*?)<\/a>/i', function($matches) {
|
|
|
|
return Convert::html2raw($matches[2]) . "[$matches[1]]";
|
|
|
|
}, $data);
|
2007-07-19 10:40:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Replace images with their alt tags
|
|
|
|
if($config['ReplaceImagesWithAlt']) {
|
2012-02-27 22:14:02 +01:00
|
|
|
$data = preg_replace('/<img[^>]*alt *= *"([^"]*)"[^>]*>/i', ' \\1 ', $data);
|
|
|
|
$data = preg_replace('/<img[^>]*alt *= *([^ ]*)[^>]*>/i', ' \\1 ', $data);
|
2007-07-19 10:40:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Compress whitespace
|
|
|
|
if($config['CompressWhitespace']) {
|
2012-02-27 22:14:02 +01:00
|
|
|
$data = preg_replace("/\s+/", " ", $data);
|
2007-07-19 10:40:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Parse newline tags
|
2012-02-27 22:14:02 +01:00
|
|
|
$data = preg_replace("/\s*<[Hh][1-6]([^A-Za-z0-9>][^>]*)?> */", "\n\n", $data);
|
|
|
|
$data = preg_replace("/\s*<[Pp]([^A-Za-z0-9>][^>]*)?> */", "\n\n", $data);
|
|
|
|
$data = preg_replace("/\s*<[Dd][Ii][Vv]([^A-Za-z0-9>][^>]*)?> */", "\n\n", $data);
|
|
|
|
$data = preg_replace("/\n\n\n+/", "\n\n", $data);
|
|
|
|
|
|
|
|
$data = preg_replace("/<[Bb][Rr]([^A-Za-z0-9>][^>]*)?> */", "\n", $data);
|
|
|
|
$data = preg_replace("/<[Tt][Rr]([^A-Za-z0-9>][^>]*)?> */", "\n", $data);
|
2012-02-28 00:44:27 +01:00
|
|
|
$data = preg_replace("/<\/[Tt][Dd]([^A-Za-z0-9>][^>]*)?> */", " ", $data);
|
2007-07-19 10:40:28 +00:00
|
|
|
$data = preg_replace('/<\/p>/i', "\n\n", $data );
|
|
|
|
|
|
|
|
// Replace HTML entities
|
2008-10-25 06:05:27 +00:00
|
|
|
//$data = preg_replace("/&#([0-9]+);/e", 'chr(\1)', $data);
|
|
|
|
//$data = str_replace(array("<",">","&"," "), array("<", ">", "&", " "), $data);
|
|
|
|
$data = html_entity_decode($data, ENT_COMPAT , 'UTF-8');
|
2007-07-19 10:40:28 +00:00
|
|
|
// Remove all tags (but optionally keep links)
|
|
|
|
|
|
|
|
// strip_tags seemed to be restricting the length of the output
|
|
|
|
// arbitrarily. This essentially does the same thing.
|
2009-06-08 00:11:22 +00:00
|
|
|
if(!$preserveLinks && !$config['PreserveLinks']) {
|
2007-07-19 10:40:28 +00:00
|
|
|
$data = preg_replace('/<\/?[^>]*>/','', $data);
|
|
|
|
} else {
|
|
|
|
$data = strip_tags($data, '<a>');
|
|
|
|
}
|
|
|
|
return trim(wordwrap(trim($data), $wordWrap));
|
|
|
|
}
|
2012-02-27 22:14:02 +01:00
|
|
|
|
2007-07-19 10:40:28 +00:00
|
|
|
/**
|
|
|
|
* There are no real specifications on correctly encoding mailto-links,
|
|
|
|
* but this seems to be compatible with most of the user-agents.
|
|
|
|
* Does nearly the same as rawurlencode().
|
|
|
|
* Please only encode the values, not the whole url, e.g.
|
|
|
|
* "mailto:test@test.com?subject=" . Convert::raw2mailto($subject)
|
|
|
|
*
|
|
|
|
* @param $data string
|
|
|
|
* @return string
|
|
|
|
* @see http://www.ietf.org/rfc/rfc1738.txt
|
|
|
|
*/
|
2012-09-19 12:07:39 +02:00
|
|
|
public static function raw2mailto($data) {
|
2007-07-19 10:40:28 +00:00
|
|
|
return str_ireplace(
|
|
|
|
array("\n",'?','=',' ','(',')','&','@','"','\'',';'),
|
|
|
|
array('%0A','%3F','%3D','%20','%28','%29','%26','%40','%22','%27','%3B'),
|
|
|
|
$data
|
|
|
|
);
|
|
|
|
}
|
2011-02-22 10:53:58 +13:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Convert a string (normally a title) to a string suitable for using in
|
2011-11-14 12:26:51 +01:00
|
|
|
* urls and other html attributes. Uses {@link URLSegmentFilter}.
|
2011-02-22 10:53:58 +13:00
|
|
|
*
|
|
|
|
* @param string
|
|
|
|
* @return string
|
|
|
|
*/
|
|
|
|
public static function raw2url($title) {
|
2012-04-04 16:59:30 +02:00
|
|
|
$f = URLSegmentFilter::create();
|
2011-11-14 12:26:51 +01:00
|
|
|
return $f->filter($title);
|
2011-02-22 10:53:58 +13:00
|
|
|
}
|
2012-10-16 17:10:54 +13:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Normalises newline sequences to conform to (an) OS specific format.
|
|
|
|
* @param string $data Text containing potentially mixed formats of newline
|
|
|
|
* sequences including \r, \r\n, \n, or unicode newline characters
|
|
|
|
* @param string $nl The newline sequence to normalise to. Defaults to that
|
|
|
|
* specified by the current OS
|
|
|
|
*/
|
|
|
|
public static function nl2os($data, $nl = PHP_EOL) {
|
|
|
|
return preg_replace('~\R~u', $nl, $data);
|
|
|
|
}
|
2012-02-27 22:14:02 +01:00
|
|
|
}
|