silverstripe-framework/dev/CSSContentParser.php
Damian Mooyman 0b1f297873 Merge remote-tracking branch 'origin/3.1'
Conflicts:
	.travis.yml
	README.md
	admin/code/LeftAndMain.php
	admin/css/screen.css
	admin/scss/screen.scss
	api/RestfulService.php
	conf/ConfigureFromEnv.php
	control/injector/ServiceConfigurationLocator.php
	control/injector/SilverStripeServiceConfigurationLocator.php
	core/ClassInfo.php
	core/Object.php
	css/AssetUploadField.css
	css/ComplexTableField_popup.css
	dev/CSSContentParser.php
	dev/DevelopmentAdmin.php
	docs/en/changelogs/index.md
	docs/en/misc/contributing/code.md
	docs/en/reference/execution-pipeline.md
	filesystem/GD.php
	filesystem/ImagickBackend.php
	filesystem/Upload.php
	forms/Form.php
	forms/FormField.php
	forms/HtmlEditorConfig.php
	forms/gridfield/GridFieldDetailForm.php
	forms/gridfield/GridFieldSortableHeader.php
	lang/en.yml
	model/Aggregate.php
	model/DataList.php
	model/DataObject.php
	model/DataQuery.php
	model/Image.php
	model/MySQLDatabase.php
	model/SQLQuery.php
	model/fieldtypes/HTMLText.php
	model/fieldtypes/Text.php
	scss/AssetUploadField.scss
	search/filters/SearchFilter.php
	security/Authenticator.php
	security/LoginForm.php
	security/Member.php
	security/MemberAuthenticator.php
	security/MemberLoginForm.php
	security/Security.php
	tests/behat/features/bootstrap/SilverStripe/Framework/Test/Behaviour/CmsFormsContext.php
	tests/control/HTTPTest.php
	tests/control/RequestHandlingTest.php
	tests/filesystem/UploadTest.php
	tests/forms/FormTest.php
	tests/forms/NumericFieldTest.php
	tests/model/DataListTest.php
	tests/model/DataObjectTest.php
	tests/model/TextTest.php
	tests/security/MemberAuthenticatorTest.php
	tests/security/SecurityDefaultAdminTest.php
	tests/view/SSViewerCacheBlockTest.php
	tests/view/SSViewerTest.php
2014-11-18 12:45:54 +13:00

114 lines
3.5 KiB
PHP

<?php
/**
* CSSContentParser enables parsing & assertion running of HTML content via CSS selectors.
* It works by converting the content to XHTML using tidy, rewriting the CSS selectors as XPath queries, and executing
* those using SimpeXML.
*
* It was built to facilitate testing using PHPUnit and contains a number of assert methods that will throw PHPUnit
* assertion exception when applicable.
*
* Tries to use the PHP tidy extension (http://php.net/tidy),
* and falls back to the "tidy" CLI tool. If none of those exists,
* the string is parsed directly without sanitization.
*
* Caution: Doesn't fully support HTML elements like <header>
* due to them being declared illegal by the "tidy" preprocessing step.
*
* @package framework
* @subpackage core
*/
class CSSContentParser extends Object {
protected $simpleXML = null;
public function __construct($content) {
if(extension_loaded('tidy')) {
// using the tidy php extension
$tidy = new tidy();
$tidy->parseString(
$content,
array(
'output-xhtml' => true,
'numeric-entities' => true,
'wrap' => 0, // We need this to be consistent for functional test string comparisons
),
'utf8'
);
$tidy->cleanRepair();
$tidy = str_replace('xmlns="http://www.w3.org/1999/xhtml"','',$tidy);
$tidy = str_replace('&#160;','',$tidy);
} elseif(@shell_exec('which tidy')) {
// using tiny through cli
$CLI_content = escapeshellarg($content);
$tidy = `echo $CLI_content | tidy --force-output 1 -n -q -utf8 -asxhtml -w 0 2> /dev/null`;
$tidy = str_replace('xmlns="http://www.w3.org/1999/xhtml"','',$tidy);
$tidy = str_replace('&#160;','',$tidy);
} else {
// no tidy library found, hence no sanitizing
$tidy = $content;
}
$this->simpleXML = @simplexml_load_string($tidy, 'SimpleXMLElement', LIBXML_NOWARNING);
if(!$this->simpleXML) {
throw new Exception('CSSContentParser::__construct(): Could not parse content.'
. ' Please check the PHP extension tidy is installed.');
}
parent::__construct();
}
/**
* Returns a number of SimpleXML elements that match the given CSS selector.
* Currently the selector engine only supports querying by tag, id, and class.
* See {@link getByXpath()} for a more direct selector syntax.
*
* @param String $selector
* @return SimpleXMLElement
*/
public function getBySelector($selector) {
$xpath = $this->selector2xpath($selector);
return $this->getByXpath($xpath);
}
/**
* Allows querying the content through XPATH selectors.
*
* @param String $xpath SimpleXML compatible XPATH statement
* @return SimpleXMLElement|false
*/
public function getByXpath($xpath) {
return $this->simpleXML->xpath($xpath);
}
/**
* Converts a CSS selector into an equivalent xpath expression.
* Currently the selector engine only supports querying by tag, id, and class.
*
* @param String $selector See {@link getBySelector()}
* @return String XPath expression
*/
public function selector2xpath($selector) {
$parts = preg_split('/\\s+/', $selector);
$xpath = "";
foreach($parts as $part) {
if(preg_match('/^([A-Za-z][A-Za-z0-9]*)/', $part, $matches)) {
$xpath .= "//$matches[1]";
} else {
$xpath .= "//*";
}
$xfilters = array();
if(preg_match('/#([^#.\[]+)/', $part, $matches)) {
$xfilters[] = "@id='$matches[1]'";
}
if(preg_match('/\.([^#.\[]+)/', $part, $matches)) {
$xfilters[] = "contains(@class,'$matches[1]')";
}
if($xfilters) $xpath .= '[' . implode(',', $xfilters) . ']';
}
return $xpath;
}
}