* due to them being declared illegal by the "tidy" preprocessing step. */ class CSSContentParser { use Injectable; protected $simpleXML = null; public function __construct($content) { if (extension_loaded('tidy')) { // using the tidy php extension $tidy = new tidy(); $tidy->parseString( $content, array( 'output-xhtml' => true, 'numeric-entities' => true, 'wrap' => 0, // We need this to be consistent for functional test string comparisons ), 'utf8' ); $tidy->cleanRepair(); $tidy = str_replace('xmlns="http://www.w3.org/1999/xhtml"', '', $tidy); $tidy = str_replace(' ', '', $tidy); } elseif (@shell_exec('which tidy')) { // using tiny through cli $CLI_content = escapeshellarg($content); $tidy = `echo $CLI_content | tidy --force-output 1 -n -q -utf8 -asxhtml -w 0 2> /dev/null`; $tidy = str_replace('xmlns="http://www.w3.org/1999/xhtml"', '', $tidy); $tidy = str_replace(' ', '', $tidy); } else { // no tidy library found, hence no sanitizing $tidy = $content; } $this->simpleXML = @simplexml_load_string($tidy, 'SimpleXMLElement', LIBXML_NOWARNING); if (!$this->simpleXML) { throw new Exception('CSSContentParser::__construct(): Could not parse content.' . ' Please check the PHP extension tidy is installed.'); } } /** * Returns a number of SimpleXML elements that match the given CSS selector. * Currently the selector engine only supports querying by tag, id, and class. * See {@link getByXpath()} for a more direct selector syntax. * * @param String $selector * @return SimpleXMLElement[] */ public function getBySelector($selector) { $xpath = $this->selector2xpath($selector); return $this->getByXpath($xpath); } /** * Allows querying the content through XPATH selectors. * * @param String $xpath SimpleXML compatible XPATH statement * @return SimpleXMLElement[] */ public function getByXpath($xpath) { return $this->simpleXML->xpath($xpath); } /** * Converts a CSS selector into an equivalent xpath expression. * Currently the selector engine only supports querying by tag, id, and class. * * @param String $selector See {@link getBySelector()} * @return String XPath expression */ public function selector2xpath($selector) { $parts = preg_split('/\\s+/', $selector); $xpath = ""; foreach ($parts as $part) { if (preg_match('/^([A-Za-z][A-Za-z0-9]*)/', $part, $matches)) { $xpath .= "//$matches[1]"; } else { $xpath .= "//*"; } $xfilters = array(); if (preg_match('/#([^#.\[]+)/', $part, $matches)) { $xfilters[] = "@id='$matches[1]'"; } if (preg_match('/\.([^#.\[]+)/', $part, $matches)) { $xfilters[] = "contains(@class,'$matches[1]')"; } if ($xfilters) { $xpath .= '[' . implode(',', $xfilters) . ']'; } } return $xpath; } }