silverstripe-framework/View/Parsers/HTMLValue.php
2016-09-09 18:43:05 +12:00

164 lines
4.0 KiB
PHP

<?php
namespace SilverStripe\View\Parsers;
use SilverStripe\Core\Convert;
use SilverStripe\View\ViewableData;
use DOMNodeList;
use DOMXPath;
use DOMDocument;
/**
* This class handles the converting of HTML fragments between a string and a DOMDocument based
* representation.
*
* It's designed to allow dependancy injection to replace the standard HTML4 version with one that
* handles XHTML or HTML5 instead
*/
abstract class HTMLValue extends ViewableData {
public function __construct($fragment = null) {
if ($fragment) $this->setContent($fragment);
parent::__construct();
}
abstract public function setContent($fragment);
/**
* @return string
*/
public function getContent() {
$doc = clone $this->getDocument();
$xp = new DOMXPath($doc);
// If there's no body, the content is empty string
if (!$doc->getElementsByTagName('body')->length) return '';
// saveHTML Percentage-encodes any URI-based attributes. We don't want this, since it interferes with
// shortcodes. So first, save all the attribute values for later restoration.
$attrs = array(); $i = 0;
foreach ($xp->query('//body//@*') as $attr) {
$key = "__HTMLVALUE_".($i++);
$attrs[$key] = $attr->value;
$attr->value = $key;
}
// Then, call saveHTML & extract out the content from the body tag
$res = preg_replace(
array(
'/^(.*?)<body>/is',
'/<\/body>(.*?)$/isD',
),
'',
$doc->saveHTML()
);
// Then replace the saved attributes with their original versions
$res = preg_replace_callback('/__HTMLVALUE_(\d+)/', function($matches) use ($attrs) {
return Convert::raw2att($attrs[$matches[0]]);
}, $res);
// Prevent &nbsp; being encoded as literal utf-8 characters
// Possible alternative solution: http://stackoverflow.com/questions/2142120/php-encoding-with-domdocument
$from = mb_convert_encoding('&nbsp;', 'utf-8', 'html-entities');
$res = str_replace($from, '&nbsp;', $res);
return $res;
}
/** @see HTMLValue::getContent() */
public function forTemplate() {
return $this->getContent();
}
/** @var DOMDocument */
private $document = null;
/** @var bool */
private $valid = true;
/**
* Get the DOMDocument for the passed content
* @return DOMDocument | false - Return false if HTML not valid, the DOMDocument instance otherwise
*/
public function getDocument() {
if (!$this->valid) {
return false;
}
else if ($this->document) {
return $this->document;
}
else {
$this->document = new DOMDocument('1.0', 'UTF-8');
$this->document->strictErrorChecking = false;
$this->document->formatOutput = false;
return $this->document;
}
}
/**
* Is this HTMLValue in an errored state?
* @return bool
*/
public function isValid() {
return $this->valid;
}
/**
* @param DOMDocument $document
*/
public function setDocument($document) {
$this->document = $document;
$this->valid = true;
}
public function setInvalid() {
$this->document = $this->valid = false;
}
/**
* Pass through any missed method calls to DOMDocument (if they exist)
* so that HTMLValue can be treated mostly like an instance of DOMDocument
*
* @param string $method
* @param array $arguments
* @return mixed
*/
public function __call($method, $arguments) {
$doc = $this->getDocument();
if(method_exists($doc, $method)) {
return call_user_func_array(array($doc, $method), $arguments);
}
else {
return parent::__call($method, $arguments);
}
}
/**
* Get the body element, or false if there isn't one (we haven't loaded any content
* or this instance is in an invalid state)
*/
public function getBody() {
$doc = $this->getDocument();
if (!$doc) return false;
$body = $doc->getElementsByTagName('body');
if (!$body->length) return false;
return $body->item(0);
}
/**
* Make an xpath query against this HTML
*
* @param string $query The xpath query string
* @return DOMNodeList
*/
public function query($query) {
$xp = new DOMXPath($this->getDocument());
return $xp->query($query);
}
}