2007-07-19 12:40:28 +02:00
|
|
|
<?php
|
2008-02-25 03:10:37 +01:00
|
|
|
/**
|
|
|
|
* Represents a large text field that contains HTML content.
|
2010-10-15 01:58:09 +02:00
|
|
|
* This behaves similarly to {@link Text}, but the template processor won't escape any HTML content within it.
|
2014-08-15 08:53:05 +02:00
|
|
|
*
|
2010-10-15 01:58:09 +02:00
|
|
|
* @see HTMLVarchar
|
|
|
|
* @see Text
|
|
|
|
* @see Varchar
|
2014-08-15 08:53:05 +02:00
|
|
|
*
|
2012-04-12 08:02:46 +02:00
|
|
|
* @package framework
|
2008-02-25 03:10:37 +01:00
|
|
|
* @subpackage model
|
|
|
|
*/
|
2007-07-19 12:40:28 +02:00
|
|
|
class HTMLText extends Text {
|
2013-03-21 19:48:54 +01:00
|
|
|
private static $escape_type = 'xml';
|
2009-02-12 10:38:43 +01:00
|
|
|
|
2013-03-21 19:48:54 +01:00
|
|
|
private static $casting = array(
|
2012-01-31 16:22:22 +01:00
|
|
|
"AbsoluteLinks" => "HTMLText",
|
|
|
|
"BigSummary" => "HTMLText",
|
|
|
|
"ContextSummary" => "HTMLText",
|
|
|
|
"FirstParagraph" => "HTMLText",
|
|
|
|
"FirstSentence" => "HTMLText",
|
|
|
|
"LimitCharacters" => "HTMLText",
|
|
|
|
"LimitSentences" => "HTMLText",
|
|
|
|
"Lower" => "HTMLText",
|
|
|
|
"LowerCase" => "HTMLText",
|
|
|
|
"Summary" => "HTMLText",
|
|
|
|
"Upper" => "HTMLText",
|
|
|
|
"UpperCase" => "HTMLText",
|
|
|
|
'EscapeXML' => 'HTMLText',
|
|
|
|
'LimitWordCount' => 'HTMLText',
|
|
|
|
'LimitWordCountXML' => 'HTMLText',
|
|
|
|
'NoHTML' => 'Text',
|
|
|
|
);
|
|
|
|
|
2013-03-11 21:52:30 +01:00
|
|
|
protected $processShortcodes = true;
|
|
|
|
|
2014-02-16 23:22:26 +01:00
|
|
|
protected $whitelist = false;
|
|
|
|
|
|
|
|
public function __construct($name = null, $options = array()) {
|
|
|
|
if(is_string($options)) {
|
|
|
|
$options = array('whitelist' => $options);
|
|
|
|
}
|
|
|
|
|
|
|
|
return parent::__construct($name, $options);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param array $options
|
|
|
|
*
|
|
|
|
* Options accepted in addition to those provided by Text:
|
|
|
|
*
|
|
|
|
* - shortcodes: If true, shortcodes will be turned into the appropriate HTML.
|
|
|
|
* If false, shortcodes will not be processed.
|
|
|
|
*
|
|
|
|
* - whitelist: If provided, a comma-separated list of elements that will be allowed to be stored
|
|
|
|
* (be careful on relying on this for XSS protection - some seemingly-safe elements allow
|
|
|
|
* attributes that can be exploited, for instance <img onload="exploiting_code();" src="..." />)
|
2014-04-09 04:48:02 +02:00
|
|
|
* Text nodes outside of HTML tags are filtered out by default, but may be included by adding
|
|
|
|
* the text() directive. E.g. 'link,meta,text()' will allow only <link /> <meta /> and text at
|
|
|
|
* the root level.
|
2014-02-16 23:22:26 +01:00
|
|
|
*/
|
2013-03-11 21:52:30 +01:00
|
|
|
public function setOptions(array $options = array()) {
|
|
|
|
parent::setOptions($options);
|
|
|
|
|
|
|
|
if(array_key_exists("shortcodes", $options)) {
|
|
|
|
$this->processShortcodes = !!$options["shortcodes"];
|
|
|
|
}
|
2014-02-16 23:22:26 +01:00
|
|
|
|
|
|
|
if(array_key_exists("whitelist", $options)) {
|
|
|
|
if(is_array($options['whitelist'])) {
|
|
|
|
$this->whitelist = $options['whitelist'];
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
$this->whitelist = preg_split('/,\s*/', $options['whitelist']);
|
|
|
|
}
|
|
|
|
}
|
2013-03-11 21:52:30 +01:00
|
|
|
}
|
|
|
|
|
2007-07-19 12:40:28 +02:00
|
|
|
/**
|
2009-06-17 13:36:49 +02:00
|
|
|
* Create a summary of the content. This will be some section of the first paragraph, limited by
|
|
|
|
* $maxWords. All internal tags are stripped out - the return value is a string
|
2014-08-15 08:53:05 +02:00
|
|
|
*
|
2012-09-26 23:34:00 +02:00
|
|
|
* This is sort of the HTML aware equivilent to Text#Summary, although the logic for summarising is not exactly
|
|
|
|
* the same
|
2014-08-15 08:53:05 +02:00
|
|
|
*
|
2009-06-17 13:36:49 +02:00
|
|
|
* @param int $maxWords Maximum number of words to return - may return less, but never more. Pass -1 for no limit
|
2014-08-15 08:53:05 +02:00
|
|
|
* @param int $flex Number of words to search through when looking for a nice cut point
|
2009-06-17 13:36:49 +02:00
|
|
|
* @param string $add What to add to the end of the summary if we cut at a less-than-ideal cut point
|
|
|
|
* @return string A nice(ish) summary with no html tags (but possibly still some html entities)
|
2014-08-15 08:53:05 +02:00
|
|
|
*
|
2012-03-24 04:38:57 +01:00
|
|
|
* @see framework/core/model/fieldtypes/Text#Summary($maxWords)
|
2007-07-19 12:40:28 +02:00
|
|
|
*/
|
2009-06-17 13:36:49 +02:00
|
|
|
public function Summary($maxWords = 50, $flex = 15, $add = '...') {
|
|
|
|
$str = false;
|
|
|
|
|
|
|
|
/* First we need the text of the first paragraph, without tags. Try using SimpleXML first */
|
|
|
|
if (class_exists('SimpleXMLElement')) {
|
|
|
|
$doc = new DOMDocument();
|
2014-08-15 08:53:05 +02:00
|
|
|
|
2012-09-26 23:34:00 +02:00
|
|
|
// Catch warnings thrown by loadHTML and turn them into a failure boolean rather than a SilverStripe error
|
2009-06-17 13:36:49 +02:00
|
|
|
set_error_handler(create_function('$no, $str', 'throw new Exception("HTML Parse Error: ".$str);'), E_ALL);
|
2009-07-02 00:27:18 +02:00
|
|
|
// Nonbreaking spaces get converted into weird characters, so strip them
|
2013-09-30 22:40:47 +02:00
|
|
|
$value = str_replace(' ', ' ', $this->value);
|
2012-09-26 23:34:00 +02:00
|
|
|
try {
|
|
|
|
$res = $doc->loadHTML('<meta content="text/html; charset=utf-8" http-equiv="Content-type"/>' . $value);
|
|
|
|
}
|
2009-06-17 13:36:49 +02:00
|
|
|
catch (Exception $e) { $res = false; }
|
|
|
|
restore_error_handler();
|
2014-08-15 08:53:05 +02:00
|
|
|
|
2009-06-17 13:36:49 +02:00
|
|
|
if ($res) {
|
|
|
|
$xml = simplexml_import_dom($doc);
|
|
|
|
$res = $xml->xpath('//p');
|
|
|
|
if (!empty($res)) $str = strip_tags($res[0]->asXML());
|
2007-07-19 12:40:28 +02:00
|
|
|
}
|
2009-06-17 13:36:49 +02:00
|
|
|
}
|
2014-08-15 08:53:05 +02:00
|
|
|
|
2012-09-26 23:34:00 +02:00
|
|
|
/* If that failed, most likely the passed HTML is broken. use a simple regex + a custom more brutal strip_tags.
|
|
|
|
* We don't use strip_tags because that does very badly on broken HTML */
|
2009-06-17 13:36:49 +02:00
|
|
|
if (!$str) {
|
|
|
|
/* See if we can pull a paragraph out*/
|
2011-03-21 22:29:48 +01:00
|
|
|
|
|
|
|
// Strip out any images in case there's one at the beginning. Not doing this will return a blank paragraph
|
2013-09-30 22:40:47 +02:00
|
|
|
$str = preg_replace('{^\s*(<.+?>)*<img[^>]*>}', '', $this->value);
|
2011-03-21 22:29:48 +01:00
|
|
|
if (preg_match('{<p(\s[^<>]*)?>(.*[A-Za-z]+.*)</p>}', $str, $matches)) $str = $matches[2];
|
|
|
|
|
2009-06-17 13:36:49 +02:00
|
|
|
/* If _that_ failed, just use the whole text */
|
2013-09-30 22:40:47 +02:00
|
|
|
if (!$str) $str = $this->value;
|
2014-08-15 08:53:05 +02:00
|
|
|
|
2009-06-17 13:36:49 +02:00
|
|
|
/* Now pull out all the html-alike stuff */
|
2012-09-26 23:34:00 +02:00
|
|
|
/* Take out anything that is obviously a tag */
|
2014-08-15 08:53:05 +02:00
|
|
|
$str = preg_replace('{</?[a-zA-Z]+[^<>]*>}', '', $str);
|
2012-09-26 23:34:00 +02:00
|
|
|
/* Strip out any left over looking bits. Textual < or > should already be encoded to < or > */
|
2014-08-15 08:53:05 +02:00
|
|
|
$str = preg_replace('{</|<|>}', '', $str);
|
2009-06-17 13:36:49 +02:00
|
|
|
}
|
2014-08-15 08:53:05 +02:00
|
|
|
|
2012-09-26 23:34:00 +02:00
|
|
|
/* Now split into words. If we are under the maxWords limit, just return the whole string (re-implode for
|
|
|
|
* whitespace normalization) */
|
2009-06-17 13:36:49 +02:00
|
|
|
$words = preg_split('/\s+/', $str);
|
|
|
|
if ($maxWords == -1 || count($words) <= $maxWords) return implode(' ', $words);
|
|
|
|
|
2012-09-26 23:34:00 +02:00
|
|
|
/* Otherwise work backwards for a looking for a sentence ending (we try to avoid abbreviations, but aren't
|
|
|
|
* very good at it) */
|
2009-06-17 13:36:49 +02:00
|
|
|
for ($i = $maxWords; $i >= $maxWords - $flex && $i >= 0; $i--) {
|
|
|
|
if (preg_match('/\.$/', $words[$i]) && !preg_match('/(Dr|Mr|Mrs|Ms|Miss|Sr|Jr|No)\.$/i', $words[$i])) {
|
|
|
|
return implode(' ', array_slice($words, 0, $i+1));
|
2007-07-19 12:40:28 +02:00
|
|
|
}
|
|
|
|
}
|
2014-08-15 08:53:05 +02:00
|
|
|
|
2012-09-26 23:34:00 +02:00
|
|
|
// If we didn't find a sentence ending quickly enough, just cut at the maxWords point and add '...' to the end
|
2009-06-17 13:36:49 +02:00
|
|
|
return implode(' ', array_slice($words, 0, $maxWords)) . $add;
|
|
|
|
}
|
2014-08-15 08:53:05 +02:00
|
|
|
|
2009-06-17 13:36:49 +02:00
|
|
|
/**
|
2012-09-26 23:34:00 +02:00
|
|
|
* Returns the first sentence from the first paragraph. If it can't figure out what the first paragraph is (or
|
|
|
|
* there isn't one), it returns the same as Summary()
|
2014-08-15 08:53:05 +02:00
|
|
|
*
|
2009-06-17 13:36:49 +02:00
|
|
|
* This is the HTML aware equivilent to Text#FirstSentence
|
2014-08-15 08:53:05 +02:00
|
|
|
*
|
2012-03-24 04:38:57 +01:00
|
|
|
* @see framework/core/model/fieldtypes/Text#FirstSentence()
|
2009-06-17 13:36:49 +02:00
|
|
|
*/
|
2012-09-19 12:07:39 +02:00
|
|
|
public function FirstSentence() {
|
2009-06-17 13:36:49 +02:00
|
|
|
/* Use summary's html processing logic to get the first paragraph */
|
|
|
|
$paragraph = $this->Summary(-1);
|
2014-08-15 08:53:05 +02:00
|
|
|
|
2009-06-17 13:36:49 +02:00
|
|
|
/* Then look for the first sentence ending. We could probably use a nice regex, but for now this will do */
|
|
|
|
$words = preg_split('/\s+/', $paragraph);
|
|
|
|
foreach ($words as $i => $word) {
|
2014-05-26 02:57:12 +02:00
|
|
|
if (preg_match('/(!|\?|\.)$/', $word) && !preg_match('/(Dr|Mr|Mrs|Ms|Miss|Sr|Jr|No)\.$/i', $word)) {
|
2009-06-17 13:36:49 +02:00
|
|
|
return implode(' ', array_slice($words, 0, $i+1));
|
2008-02-25 03:10:37 +01:00
|
|
|
}
|
2007-07-19 12:40:28 +02:00
|
|
|
}
|
2014-08-15 08:53:05 +02:00
|
|
|
|
2012-09-26 23:34:00 +02:00
|
|
|
/* If we didn't find a sentence ending, use the summary. We re-call rather than using paragraph so that
|
|
|
|
* Summary will limit the result this time */
|
2009-06-17 13:36:49 +02:00
|
|
|
return $this->Summary();
|
2014-08-15 08:53:05 +02:00
|
|
|
}
|
|
|
|
|
2009-10-11 02:06:58 +02:00
|
|
|
public function forTemplate() {
|
2013-03-11 21:52:30 +01:00
|
|
|
if ($this->processShortcodes) {
|
|
|
|
return ShortcodeParser::get_active()->parse($this->value);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return $this->value;
|
|
|
|
}
|
2009-10-11 02:06:58 +02:00
|
|
|
}
|
2014-02-16 23:22:26 +01:00
|
|
|
|
|
|
|
public function prepValueForDB($value) {
|
2014-04-09 04:48:02 +02:00
|
|
|
return parent::prepValueForDB($this->whitelistContent($value));
|
|
|
|
}
|
2014-08-15 08:53:05 +02:00
|
|
|
|
2014-04-09 04:48:02 +02:00
|
|
|
/**
|
|
|
|
* Filter the given $value string through the whitelist filter
|
2014-08-15 08:53:05 +02:00
|
|
|
*
|
2014-04-09 04:48:02 +02:00
|
|
|
* @param string $value Input html content
|
|
|
|
* @return string Value with all non-whitelisted content stripped (if applicable)
|
|
|
|
*/
|
|
|
|
public function whitelistContent($value) {
|
2014-02-16 23:22:26 +01:00
|
|
|
if($this->whitelist) {
|
|
|
|
$dom = Injector::inst()->create('HTMLValue', $value);
|
|
|
|
|
|
|
|
$query = array();
|
2014-04-09 04:48:02 +02:00
|
|
|
$textFilter = ' | //body/text()';
|
|
|
|
foreach ($this->whitelist as $tag) {
|
|
|
|
if($tag === 'text()') {
|
|
|
|
$textFilter = ''; // Disable text filter if allowed
|
|
|
|
} else {
|
|
|
|
$query[] = 'not(self::'.$tag.')';
|
|
|
|
}
|
|
|
|
}
|
2014-02-16 23:22:26 +01:00
|
|
|
|
2014-04-09 04:48:02 +02:00
|
|
|
foreach($dom->query('//body//*['.implode(' and ', $query).']'.$textFilter) as $el) {
|
2014-02-16 23:22:26 +01:00
|
|
|
if ($el->parentNode) $el->parentNode->removeChild($el);
|
|
|
|
}
|
|
|
|
|
|
|
|
$value = $dom->getContent();
|
|
|
|
}
|
2014-04-09 04:48:02 +02:00
|
|
|
return $value;
|
2014-02-16 23:22:26 +01:00
|
|
|
}
|
|
|
|
|
2011-12-12 06:44:47 +01:00
|
|
|
/**
|
|
|
|
* Returns true if the field has meaningful content.
|
|
|
|
* Excludes null content like <h1></h1>, <p></p> ,etc
|
2014-08-15 08:53:05 +02:00
|
|
|
*
|
2011-12-12 06:44:47 +01:00
|
|
|
* @return boolean
|
|
|
|
*/
|
2012-04-11 04:48:06 +02:00
|
|
|
public function exists() {
|
2011-12-12 06:44:47 +01:00
|
|
|
// If it's blank, it's blank
|
2013-02-18 14:41:49 +01:00
|
|
|
if(!parent::exists()) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2011-12-12 06:44:47 +01:00
|
|
|
// If it's got a content tag
|
2013-09-18 11:39:05 +02:00
|
|
|
if(preg_match('/<(img|embed|object|iframe|meta|source|link)[^>]*>/i', $this->value)) {
|
2013-02-18 14:41:49 +01:00
|
|
|
return true;
|
|
|
|
}
|
2014-08-15 08:53:05 +02:00
|
|
|
|
|
|
|
// If it's just one or two tags on its own (and not the above) it's empty.
|
2013-02-18 14:41:49 +01:00
|
|
|
// This might be <p></p> or <h1></h1> or whatever.
|
|
|
|
if(preg_match('/^[\\s]*(<[^>]+>[\\s]*){1,2}$/', $this->value)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2011-12-12 06:44:47 +01:00
|
|
|
// Otherwise its content is genuine content
|
|
|
|
return true;
|
2009-10-11 02:06:58 +02:00
|
|
|
}
|
2014-08-15 08:53:05 +02:00
|
|
|
|
2008-10-14 00:20:41 +02:00
|
|
|
public function scaffoldFormField($title = null, $params = null) {
|
2008-08-09 08:29:50 +02:00
|
|
|
return new HtmlEditorField($this->name, $title);
|
2008-08-06 04:43:46 +02:00
|
|
|
}
|
2014-08-15 08:53:05 +02:00
|
|
|
|
2012-03-27 06:04:11 +02:00
|
|
|
public function scaffoldSearchField($title = null, $params = null) {
|
2008-08-06 04:43:46 +02:00
|
|
|
return new TextField($this->name, $title);
|
|
|
|
}
|
2007-07-19 12:40:28 +02:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2012-02-12 21:22:11 +01:00
|
|
|
|