2009-10-11 02:07:27 +02:00
|
|
|
<?php
|
2013-03-11 21:06:20 +01:00
|
|
|
|
2009-10-11 02:07:27 +02:00
|
|
|
/**
|
2013-03-11 21:06:20 +01:00
|
|
|
* This class handles the converting of HTML fragments between a string and a DOMDocument based
|
|
|
|
* representation.
|
|
|
|
*
|
|
|
|
* It's designed to allow dependancy injection to replace the standard HTML4 version with one that
|
|
|
|
* handles XHTML or HTML5 instead
|
2009-10-11 02:07:27 +02:00
|
|
|
*
|
2012-04-12 08:02:46 +02:00
|
|
|
* @package framework
|
2009-10-11 02:07:27 +02:00
|
|
|
* @subpackage integration
|
|
|
|
*/
|
2013-03-11 21:06:20 +01:00
|
|
|
abstract class SS_HTMLValue extends ViewableData {
|
2012-10-16 00:59:30 +02:00
|
|
|
|
2013-03-11 21:06:20 +01:00
|
|
|
public function __construct($fragment = null) {
|
|
|
|
if ($fragment) $this->setContent($fragment);
|
2009-10-11 02:07:27 +02:00
|
|
|
parent::__construct();
|
|
|
|
}
|
2012-10-16 00:59:30 +02:00
|
|
|
|
2013-03-11 21:06:20 +01:00
|
|
|
abstract public function setContent($fragment);
|
2012-10-16 00:59:30 +02:00
|
|
|
|
2009-10-11 02:07:27 +02:00
|
|
|
/**
|
2013-03-11 21:06:20 +01:00
|
|
|
* @param string $content
|
2009-10-11 02:07:27 +02:00
|
|
|
* @return string
|
|
|
|
*/
|
|
|
|
public function getContent() {
|
2013-03-11 21:06:20 +01:00
|
|
|
$doc = clone $this->getDocument();
|
|
|
|
$xp = new DOMXPath($doc);
|
|
|
|
|
|
|
|
// If there's no body, the content is empty string
|
|
|
|
if (!$doc->getElementsByTagName('body')->length) return '';
|
|
|
|
|
|
|
|
// saveHTML Percentage-encodes any URI-based attributes. We don't want this, since it interferes with
|
|
|
|
// shortcodes. So first, save all the attribute values for later restoration.
|
|
|
|
$attrs = array(); $i = 0;
|
|
|
|
|
|
|
|
foreach ($xp->query('//body//@*') as $attr) {
|
|
|
|
$key = "__HTMLVALUE_".($i++);
|
|
|
|
$attrs[$key] = $attr->value;
|
|
|
|
$attr->value = $key;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Then, call saveHTML & extract out the content from the body tag
|
|
|
|
$res = preg_replace(
|
|
|
|
array(
|
|
|
|
'/^(.*?)<body>/is',
|
|
|
|
'/<\/body>(.*?)$/isD',
|
|
|
|
),
|
|
|
|
'',
|
|
|
|
$doc->saveHTML()
|
2009-10-11 02:07:27 +02:00
|
|
|
);
|
2013-03-11 21:06:20 +01:00
|
|
|
|
|
|
|
// Then replace the saved attributes with their original versions
|
|
|
|
$res = preg_replace_callback('/__HTMLVALUE_(\d+)/', function($matches) use ($attrs) {
|
2013-04-17 23:13:24 +02:00
|
|
|
return Convert::raw2att($attrs[$matches[0]]);
|
2013-03-11 21:06:20 +01:00
|
|
|
}, $res);
|
|
|
|
|
|
|
|
return $res;
|
|
|
|
}
|
|
|
|
|
|
|
|
/** @see HTMLValue::getContent() */
|
|
|
|
public function forTemplate() {
|
|
|
|
return $this->getContent();
|
2009-10-11 02:07:27 +02:00
|
|
|
}
|
2012-10-16 00:59:30 +02:00
|
|
|
|
2013-03-11 21:06:20 +01:00
|
|
|
/** @var DOMDocument */
|
|
|
|
private $document = null;
|
|
|
|
/** @var bool */
|
|
|
|
private $valid = true;
|
|
|
|
|
2009-10-11 02:07:27 +02:00
|
|
|
/**
|
2013-03-11 21:06:20 +01:00
|
|
|
* Get the DOMDocument for the passed content
|
|
|
|
* @return DOMDocument | false - Return false if HTML not valid, the DOMDocument instance otherwise
|
2009-10-11 02:07:27 +02:00
|
|
|
*/
|
2013-03-11 21:06:20 +01:00
|
|
|
public function getDocument() {
|
|
|
|
if (!$this->valid) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
else if ($this->document) {
|
|
|
|
return $this->document;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
$this->document = new DOMDocument('1.0', 'UTF-8');
|
|
|
|
$this->document->strictErrorChecking = false;
|
|
|
|
$this->document->formatOutput = false;
|
2010-10-19 07:07:27 +02:00
|
|
|
|
2013-03-11 21:06:20 +01:00
|
|
|
return $this->document;
|
|
|
|
}
|
2009-10-11 02:07:27 +02:00
|
|
|
}
|
2012-10-16 00:59:30 +02:00
|
|
|
|
2009-10-11 02:07:27 +02:00
|
|
|
/**
|
2013-03-11 21:06:20 +01:00
|
|
|
* Is this HTMLValue in an errored state?
|
|
|
|
* @return bool
|
2009-10-11 02:07:27 +02:00
|
|
|
*/
|
2013-03-11 21:06:20 +01:00
|
|
|
public function isValid() {
|
|
|
|
return $this->valid;
|
2009-10-11 02:07:27 +02:00
|
|
|
}
|
2012-10-16 00:59:30 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @param DOMDocument $document
|
|
|
|
*/
|
|
|
|
public function setDocument($document) {
|
|
|
|
$this->document = $document;
|
2013-03-11 21:06:20 +01:00
|
|
|
$this->valid = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
public function setInvalid() {
|
|
|
|
$this->document = $this->valid = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Pass through any missed method calls to DOMDocument (if they exist)
|
|
|
|
* so that HTMLValue can be treated mostly like an instance of DOMDocument
|
|
|
|
*/
|
|
|
|
public function __call($method, $arguments) {
|
|
|
|
$doc = $this->getDocument();
|
|
|
|
|
|
|
|
if(method_exists($doc, $method)) {
|
|
|
|
return call_user_func_array(array($doc, $method), $arguments);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return parent::__call($method, $arguments);
|
|
|
|
}
|
2012-10-16 00:59:30 +02:00
|
|
|
}
|
|
|
|
|
2013-03-11 21:07:06 +01:00
|
|
|
/**
|
|
|
|
* Get the body element, or false if there isn't one (we haven't loaded any content
|
|
|
|
* or this instance is in an invalid state)
|
|
|
|
*/
|
|
|
|
public function getBody() {
|
|
|
|
$doc = $this->getDocument();
|
|
|
|
if (!$doc) return false;
|
|
|
|
|
|
|
|
$body = $doc->getElementsByTagName('body');
|
|
|
|
if (!$body->length) return false;
|
|
|
|
|
|
|
|
return $body->item(0);
|
|
|
|
}
|
|
|
|
|
2009-10-11 02:07:27 +02:00
|
|
|
/**
|
2013-03-11 21:06:20 +01:00
|
|
|
* Make an xpath query against this HTML
|
2009-10-11 02:07:27 +02:00
|
|
|
*
|
2013-03-11 21:06:20 +01:00
|
|
|
* @param $query string - The xpath query string
|
2009-10-11 02:07:27 +02:00
|
|
|
* @return DOMNodeList
|
|
|
|
*/
|
2013-03-11 21:06:20 +01:00
|
|
|
public function query($query) {
|
|
|
|
$xp = new DOMXPath($this->getDocument());
|
|
|
|
return $xp->query($query);
|
2009-10-11 02:07:27 +02:00
|
|
|
}
|
2013-03-11 21:06:20 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
class SS_HTML4Value extends SS_HTMLValue {
|
|
|
|
|
2009-10-11 02:07:27 +02:00
|
|
|
/**
|
2013-03-11 21:06:20 +01:00
|
|
|
* @param string $content
|
|
|
|
* @return bool
|
2009-10-11 02:07:27 +02:00
|
|
|
*/
|
2013-03-11 21:06:20 +01:00
|
|
|
public function setContent($content) {
|
|
|
|
// Ensure that \r (carriage return) characters don't get replaced with " " entity by DOMDocument
|
|
|
|
// This behaviour is apparently XML spec, but we don't want this because it messes up the HTML
|
|
|
|
$content = str_replace(chr(13), '', $content);
|
|
|
|
|
|
|
|
// Reset the document if we're in an invalid state for some reason
|
|
|
|
if (!$this->isValid()) $this->setDocument(null);
|
|
|
|
|
2014-03-04 23:47:02 +01:00
|
|
|
$errorState = libxml_use_internal_errors(true);
|
|
|
|
$result = $this->getDocument()->loadHTML(
|
2013-03-11 21:06:20 +01:00
|
|
|
'<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head>' .
|
|
|
|
"<body>$content</body></html>"
|
|
|
|
);
|
2014-03-04 23:47:02 +01:00
|
|
|
libxml_clear_errors();
|
|
|
|
libxml_use_internal_errors($errorState);
|
|
|
|
return $result;
|
2009-10-11 02:07:27 +02:00
|
|
|
}
|
2009-10-13 03:44:41 +02:00
|
|
|
}
|