2009-10-11 02:07:27 +02:00
|
|
|
<?php
|
2013-03-11 21:06:20 +01:00
|
|
|
|
2016-08-19 00:51:35 +02:00
|
|
|
namespace SilverStripe\View\Parsers;
|
|
|
|
|
|
|
|
use SilverStripe\Core\Convert;
|
|
|
|
use SilverStripe\View\ViewableData;
|
|
|
|
use DOMNodeList;
|
|
|
|
use DOMXPath;
|
|
|
|
use DOMDocument;
|
|
|
|
|
2009-10-11 02:07:27 +02:00
|
|
|
/**
|
2013-03-11 21:06:20 +01:00
|
|
|
* This class handles the converting of HTML fragments between a string and a DOMDocument based
|
|
|
|
* representation.
|
|
|
|
*
|
|
|
|
* It's designed to allow dependancy injection to replace the standard HTML4 version with one that
|
|
|
|
* handles XHTML or HTML5 instead
|
2009-10-11 02:07:27 +02:00
|
|
|
*/
|
2013-03-11 21:06:20 +01:00
|
|
|
abstract class SS_HTMLValue extends ViewableData {
|
2012-10-16 00:59:30 +02:00
|
|
|
|
2013-03-11 21:06:20 +01:00
|
|
|
public function __construct($fragment = null) {
|
|
|
|
if ($fragment) $this->setContent($fragment);
|
2009-10-11 02:07:27 +02:00
|
|
|
parent::__construct();
|
|
|
|
}
|
2012-10-16 00:59:30 +02:00
|
|
|
|
2013-03-11 21:06:20 +01:00
|
|
|
abstract public function setContent($fragment);
|
2012-10-16 00:59:30 +02:00
|
|
|
|
2009-10-11 02:07:27 +02:00
|
|
|
/**
|
|
|
|
* @return string
|
|
|
|
*/
|
|
|
|
public function getContent() {
|
2013-03-11 21:06:20 +01:00
|
|
|
$doc = clone $this->getDocument();
|
|
|
|
$xp = new DOMXPath($doc);
|
|
|
|
|
|
|
|
// If there's no body, the content is empty string
|
|
|
|
if (!$doc->getElementsByTagName('body')->length) return '';
|
|
|
|
|
|
|
|
// saveHTML Percentage-encodes any URI-based attributes. We don't want this, since it interferes with
|
|
|
|
// shortcodes. So first, save all the attribute values for later restoration.
|
|
|
|
$attrs = array(); $i = 0;
|
|
|
|
|
|
|
|
foreach ($xp->query('//body//@*') as $attr) {
|
|
|
|
$key = "__HTMLVALUE_".($i++);
|
|
|
|
$attrs[$key] = $attr->value;
|
|
|
|
$attr->value = $key;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Then, call saveHTML & extract out the content from the body tag
|
|
|
|
$res = preg_replace(
|
|
|
|
array(
|
|
|
|
'/^(.*?)<body>/is',
|
|
|
|
'/<\/body>(.*?)$/isD',
|
|
|
|
),
|
|
|
|
'',
|
|
|
|
$doc->saveHTML()
|
2009-10-11 02:07:27 +02:00
|
|
|
);
|
2013-03-11 21:06:20 +01:00
|
|
|
|
|
|
|
// Then replace the saved attributes with their original versions
|
|
|
|
$res = preg_replace_callback('/__HTMLVALUE_(\d+)/', function($matches) use ($attrs) {
|
2013-04-17 23:13:24 +02:00
|
|
|
return Convert::raw2att($attrs[$matches[0]]);
|
2013-03-11 21:06:20 +01:00
|
|
|
}, $res);
|
|
|
|
|
2016-02-12 04:00:15 +01:00
|
|
|
// Prevent being encoded as literal utf-8 characters
|
|
|
|
// Possible alternative solution: http://stackoverflow.com/questions/2142120/php-encoding-with-domdocument
|
|
|
|
$from = mb_convert_encoding(' ', 'utf-8', 'html-entities');
|
|
|
|
$res = str_replace($from, ' ', $res);
|
|
|
|
|
2013-03-11 21:06:20 +01:00
|
|
|
return $res;
|
|
|
|
}
|
|
|
|
|
|
|
|
/** @see HTMLValue::getContent() */
|
|
|
|
public function forTemplate() {
|
|
|
|
return $this->getContent();
|
2009-10-11 02:07:27 +02:00
|
|
|
}
|
2012-10-16 00:59:30 +02:00
|
|
|
|
2013-03-11 21:06:20 +01:00
|
|
|
/** @var DOMDocument */
|
|
|
|
private $document = null;
|
|
|
|
/** @var bool */
|
|
|
|
private $valid = true;
|
|
|
|
|
2009-10-11 02:07:27 +02:00
|
|
|
/**
|
2013-03-11 21:06:20 +01:00
|
|
|
* Get the DOMDocument for the passed content
|
|
|
|
* @return DOMDocument | false - Return false if HTML not valid, the DOMDocument instance otherwise
|
2009-10-11 02:07:27 +02:00
|
|
|
*/
|
2013-03-11 21:06:20 +01:00
|
|
|
public function getDocument() {
|
|
|
|
if (!$this->valid) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
else if ($this->document) {
|
|
|
|
return $this->document;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
$this->document = new DOMDocument('1.0', 'UTF-8');
|
|
|
|
$this->document->strictErrorChecking = false;
|
|
|
|
$this->document->formatOutput = false;
|
2010-10-19 07:07:27 +02:00
|
|
|
|
2013-03-11 21:06:20 +01:00
|
|
|
return $this->document;
|
|
|
|
}
|
2009-10-11 02:07:27 +02:00
|
|
|
}
|
2012-10-16 00:59:30 +02:00
|
|
|
|
2009-10-11 02:07:27 +02:00
|
|
|
/**
|
2013-03-11 21:06:20 +01:00
|
|
|
* Is this HTMLValue in an errored state?
|
|
|
|
* @return bool
|
2009-10-11 02:07:27 +02:00
|
|
|
*/
|
2013-03-11 21:06:20 +01:00
|
|
|
public function isValid() {
|
|
|
|
return $this->valid;
|
2009-10-11 02:07:27 +02:00
|
|
|
}
|
2012-10-16 00:59:30 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @param DOMDocument $document
|
|
|
|
*/
|
|
|
|
public function setDocument($document) {
|
|
|
|
$this->document = $document;
|
2013-03-11 21:06:20 +01:00
|
|
|
$this->valid = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
public function setInvalid() {
|
|
|
|
$this->document = $this->valid = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Pass through any missed method calls to DOMDocument (if they exist)
|
|
|
|
* so that HTMLValue can be treated mostly like an instance of DOMDocument
|
2016-08-19 00:51:35 +02:00
|
|
|
*
|
|
|
|
* @param string $method
|
|
|
|
* @param array $arguments
|
|
|
|
* @return mixed
|
2013-03-11 21:06:20 +01:00
|
|
|
*/
|
|
|
|
public function __call($method, $arguments) {
|
|
|
|
$doc = $this->getDocument();
|
|
|
|
|
|
|
|
if(method_exists($doc, $method)) {
|
|
|
|
return call_user_func_array(array($doc, $method), $arguments);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return parent::__call($method, $arguments);
|
|
|
|
}
|
2012-10-16 00:59:30 +02:00
|
|
|
}
|
|
|
|
|
2013-03-11 21:07:06 +01:00
|
|
|
/**
|
|
|
|
* Get the body element, or false if there isn't one (we haven't loaded any content
|
|
|
|
* or this instance is in an invalid state)
|
|
|
|
*/
|
|
|
|
public function getBody() {
|
|
|
|
$doc = $this->getDocument();
|
|
|
|
if (!$doc) return false;
|
|
|
|
|
|
|
|
$body = $doc->getElementsByTagName('body');
|
|
|
|
if (!$body->length) return false;
|
|
|
|
|
|
|
|
return $body->item(0);
|
|
|
|
}
|
|
|
|
|
2009-10-11 02:07:27 +02:00
|
|
|
/**
|
2013-03-11 21:06:20 +01:00
|
|
|
* Make an xpath query against this HTML
|
2009-10-11 02:07:27 +02:00
|
|
|
*
|
2016-08-19 00:51:35 +02:00
|
|
|
* @param string $query The xpath query string
|
2009-10-11 02:07:27 +02:00
|
|
|
* @return DOMNodeList
|
|
|
|
*/
|
2013-03-11 21:06:20 +01:00
|
|
|
public function query($query) {
|
|
|
|
$xp = new DOMXPath($this->getDocument());
|
|
|
|
return $xp->query($query);
|
2009-10-11 02:07:27 +02:00
|
|
|
}
|
2013-03-11 21:06:20 +01:00
|
|
|
}
|