"HTMLText", "BigSummary" => "HTMLText", "ContextSummary" => "HTMLText", "FirstParagraph" => "HTMLText", "FirstSentence" => "HTMLText", "LimitCharacters" => "HTMLText", "LimitSentences" => "HTMLText", "Lower" => "HTMLText", "LowerCase" => "HTMLText", "Summary" => "HTMLText", "Upper" => "HTMLText", "UpperCase" => "HTMLText", 'EscapeXML' => 'HTMLText', 'LimitWordCount' => 'HTMLText', 'LimitWordCountXML' => 'HTMLText', 'NoHTML' => 'Text', ); protected $processShortcodes = true; /** * Check if shortcodes are enabled * * @return bool */ public function getProcessShortcodes() { return $this->processShortcodes; } /** * Set shortcodes on or off by default * * @param bool $process * @return $this */ public function setProcessShortcodes($process) { $this->processShortcodes = (bool)$process; return $this; } protected $whitelist = false; public function __construct($name = null, $options = array()) { if(is_string($options)) { $options = array('whitelist' => $options); } return parent::__construct($name, $options); } /** * @param array $options * * Options accepted in addition to those provided by Text: * * - shortcodes: If true, shortcodes will be turned into the appropriate HTML. * If false, shortcodes will not be processed. * * - whitelist: If provided, a comma-separated list of elements that will be allowed to be stored * (be careful on relying on this for XSS protection - some seemingly-safe elements allow * attributes that can be exploited, for instance ) * Text nodes outside of HTML tags are filtered out by default, but may be included by adding * the text() directive. E.g. 'link,meta,text()' will allow only and text at * the root level. */ public function setOptions(array $options = array()) { parent::setOptions($options); if(array_key_exists("shortcodes", $options)) { $this->processShortcodes = !!$options["shortcodes"]; } if(array_key_exists("whitelist", $options)) { if(is_array($options['whitelist'])) { $this->whitelist = $options['whitelist']; } else { $this->whitelist = preg_split('/,\s*/', $options['whitelist']); } } } /** * Create a summary of the content. This will be some section of the first paragraph, limited by * $maxWords. All internal tags are stripped out - the return value is a string * * This is sort of the HTML aware equivilent to Text#Summary, although the logic for summarising is not exactly * the same * * @param int $maxWords Maximum number of words to return - may return less, but never more. Pass -1 for no limit * @param int $flex Number of words to search through when looking for a nice cut point * @param string $add What to add to the end of the summary if we cut at a less-than-ideal cut point * @return string A nice(ish) summary with no html tags (but possibly still some html entities) * * @see framework/core/model/fieldtypes/Text#Summary($maxWords) */ public function Summary($maxWords = 50, $flex = 15, $add = '...') { $str = false; /* First we need the text of the first paragraph, without tags. Try using SimpleXML first */ if (class_exists('SimpleXMLElement')) { $doc = new DOMDocument(); // Catch warnings thrown by loadHTML and turn them into a failure boolean rather than a SilverStripe error set_error_handler(create_function('$no, $str', 'throw new Exception("HTML Parse Error: ".$str);'), E_ALL); // Nonbreaking spaces get converted into weird characters, so strip them $value = str_replace(' ', ' ', $this->RAW()); try { $res = $doc->loadHTML('' . $value); } catch (Exception $e) { $res = false; } restore_error_handler(); if ($res) { $xml = simplexml_import_dom($doc); $res = $xml->xpath('//p'); if (!empty($res)) $str = strip_tags($res[0]->asXML()); } } /* If that failed, most likely the passed HTML is broken. use a simple regex + a custom more brutal strip_tags. * We don't use strip_tags because that does very badly on broken HTML */ if (!$str) { /* See if we can pull a paragraph out*/ // Strip out any images in case there's one at the beginning. Not doing this will return a blank paragraph $str = preg_replace('{^\s*(<.+?>)*]*>}', '', $this->value); if (preg_match('{]*)?>(.*[A-Za-z]+.*)

}', $str, $matches)) $str = $matches[2]; /* If _that_ failed, just use the whole text */ if (!$str) $str = $this->value; /* Now pull out all the html-alike stuff */ /* Take out anything that is obviously a tag */ $str = preg_replace('{]*>}', '', $str); /* Strip out any left over looking bits. Textual < or > should already be encoded to < or > */ $str = preg_replace('{}', '', $str); } /* Now split into words. If we are under the maxWords limit, just return the whole string (re-implode for * whitespace normalization) */ $words = preg_split('/\s+/', $str); if ($maxWords == -1 || count($words) <= $maxWords) return implode(' ', $words); /* Otherwise work backwards for a looking for a sentence ending (we try to avoid abbreviations, but aren't * very good at it) */ for ($i = $maxWords; $i >= $maxWords - $flex && $i >= 0; $i--) { if (preg_match('/\.$/', $words[$i]) && !preg_match('/(Dr|Mr|Mrs|Ms|Miss|Sr|Jr|No)\.$/i', $words[$i])) { return implode(' ', array_slice($words, 0, $i+1)); } } // If we didn't find a sentence ending quickly enough, just cut at the maxWords point and add '...' to the end return implode(' ', array_slice($words, 0, $maxWords)) . $add; } /** * Returns the first sentence from the first paragraph. If it can't figure out what the first paragraph is (or * there isn't one), it returns the same as Summary() * * This is the HTML aware equivilent to Text#FirstSentence * * @see framework/core/model/fieldtypes/Text#FirstSentence() */ public function FirstSentence() { /* Use summary's html processing logic to get the first paragraph */ $paragraph = $this->Summary(-1); /* Then look for the first sentence ending. We could probably use a nice regex, but for now this will do */ $words = preg_split('/\s+/', $paragraph); foreach ($words as $i => $word) { if (preg_match('/(!|\?|\.)$/', $word) && !preg_match('/(Dr|Mr|Mrs|Ms|Miss|Sr|Jr|No)\.$/i', $word)) { return implode(' ', array_slice($words, 0, $i+1)); } } /* If we didn't find a sentence ending, use the summary. We re-call rather than using paragraph so that * Summary will limit the result this time */ return $this->Summary(); } public function RAW() { if ($this->processShortcodes) { return ShortcodeParser::get_active()->parse($this->value); } else { return $this->value; } } /** * Return the value of the field with relative links converted to absolute urls (with placeholders parsed). * @return string */ public function AbsoluteLinks() { return HTTP::absoluteURLs($this->forTemplate()); } public function forTemplate() { return $this->RAW(); } public function prepValueForDB($value) { return parent::prepValueForDB($this->whitelistContent($value)); } /** * Filter the given $value string through the whitelist filter * * @param string $value Input html content * @return string Value with all non-whitelisted content stripped (if applicable) */ public function whitelistContent($value) { if($this->whitelist) { $dom = Injector::inst()->create('HTMLValue', $value); $query = array(); $textFilter = ' | //body/text()'; foreach ($this->whitelist as $tag) { if($tag === 'text()') { $textFilter = ''; // Disable text filter if allowed } else { $query[] = 'not(self::'.$tag.')'; } } foreach($dom->query('//body//*['.implode(' and ', $query).']'.$textFilter) as $el) { if ($el->parentNode) $el->parentNode->removeChild($el); } $value = $dom->getContent(); } return $value; } /** * Returns true if the field has meaningful content. * Excludes null content like

,

,etc * * @return boolean */ public function exists() { // If it's blank, it's blank if(!parent::exists()) { return false; } $value = $this->RAW(); // If it's got a content tag if(preg_match('/<(img|embed|object|iframe|meta|source|link)[^>]*>/i', $value)) { return true; } // If it's just one or two tags on its own (and not the above) it's empty. // This might be

or

or whatever. if(preg_match('/^[\\s]*(<[^>]+>[\\s]*){1,2}$/', $value)) { return false; } // Otherwise its content is genuine content return true; } public function scaffoldFormField($title = null, $params = null) { return new HtmlEditorField($this->name, $title); } public function scaffoldSearchField($title = null, $params = null) { return new TextField($this->name, $title); } }