Merge branch '3.1'

2024-10-22 14:05:37 +02:00 · 2013-03-15 21:58:37 +11:00 · 2013-03-15 21:58:37 +11:00 · bc941c18b6
commit bc941c18b6
parent ce66bc1eaf 6ae931df24
71 changed files with 1602 additions and 7145 deletions
--- a/_config/html.yml
+++ b/_config/html.yml
@ -0,0 +1,3 @@
 Injector:
  HTMLValue:
    class: SS_HTML4Value
--- a/admin/code/LeftAndMain.php
+++ b/admin/code/LeftAndMain.php
@ -354,7 +354,18 @@ class LeftAndMain extends Controller implements PermissionProvider {
 	}
 	public function handleRequest(SS_HTTPRequest $request, DataModel $model = null) {
-		$response = parent::handleRequest($request, $model);
+		try {
 			$response = parent::handleRequest($request, $model);
 		} catch(ValidationException $e) {
 			// Nicer presentation of model-level validation errors
 			$msgs = _t('LeftAndMain.ValidationError', 'Validation error') . ': ' 
 				. $e->getResult()->message();
 			$e = new SS_HTTPResponse_Exception($msgs, 403);
 			$e->getResponse()->addHeader('Content-Type', 'text/plain');
 			$e->getResponse()->addHeader('X-Status', rawurlencode($msgs));
 			throw $e;
 		}
 		$title = $this->Title();
 		if(!$response->getHeader('X-Controller')) $response->addHeader('X-Controller', $this->class);
 		if(!$response->getHeader('X-Title')) $response->addHeader('X-Title', urlencode($title));
@ -1346,19 +1357,67 @@ class LeftAndMain extends Controller implements PermissionProvider {
 	/**
 	 * Return the version number of this application.
-	 * Uses the subversion path information in <mymodule>/silverstripe_version
+	 * Uses the number in <mymodule>/silverstripe_version
-	 * (automacially replaced by build scripts).
+	 * (automatically replaced by build scripts).
-	 * 
+	 * If silverstripe_version is empty,
 	 * then attempts to get it from composer.lock
 	 *
 	 * @return string
 	 */
 	public function CMSVersion() {
-		$frameworkVersion = file_get_contents(FRAMEWORK_PATH . '/silverstripe_version');
+		$versions = array();
-		if(!$frameworkVersion) $frameworkVersion = _t('LeftAndMain.VersionUnknown', 'Unknown');
+		$modules = array(
-		
+			'silverstripe/framework' => array(
-		return sprintf(
+				'title' => 'Framework',
-			"Framework: %s",
+				'versionFile' => FRAMEWORK_PATH . '/silverstripe_version',
-			$frameworkVersion
+			)
 		);
 		if(defined('CMS_PATH')) {
 			$modules['silverstripe/cms'] = array(
 				'title' => 'CMS',
 				'versionFile' => CMS_PATH . '/silverstripe_version',
 			);
 		}
 		// Tries to obtain version number from composer.lock if it exists
 		$composerLockPath = BASE_PATH . '/composer.lock';
 		if (file_exists($composerLockPath)) {
 			$cache = SS_Cache::factory('LeftAndMain_CMSVersion');
 			$cacheKey = filemtime($composerLockPath);
 			$versions = $cache->load($cacheKey);
 			if(!$versions) $versions = array();
 			if(!$versions && $jsonData = file_get_contents($composerLockPath)) {
 				$lockData = json_decode($jsonData);
 				if($lockData && isset($lockData->packages)) {
 					foreach ($lockData->packages as $package) {
 						if(
 							array_key_exists($package->name, $modules)
 							&& isset($package->version)
 						) {
 							$versions[$package->name] = $package->version;
 						}
 					}
 					$cache->save(json_encode($versions), $cacheKey);
 				}
 			}
 		} 
 		// Fall back to static version file
 		foreach($modules as $moduleName => $moduleSpec) {
 			if(!isset($versions[$moduleName])) {
 				if($staticVersion = file_get_contents($moduleSpec['versionFile'])) {
 					$versions[$moduleName] = $staticVersion;		
 				} else {
 					$versions[$moduleName] = _t('LeftAndMain.VersionUnknown', 'Unknown');		
 				}
 			}
 		}
 		$out = array();
 		foreach($modules as $moduleName => $moduleSpec) {
 			$out[] = $modules[$moduleName]['title'] . ': ' . $versions[$moduleName];
 		}
 		return implode(', ', $out);
 	}
 	/**
--- a/admin/css/screen.css
+++ b/admin/css/screen.css
@ -373,7 +373,7 @@ body.cms { overflow: hidden; }
 /** -------------------------------------------- Tabs -------------------------------------------- */
 .ui-tabs { padding: 0; background: none; }
 .ui-tabs .ui-tabs { position: static; }
-.ui-tabs .ui-tabs-panel { padding: 16px; background: transparent; border: 0; }
+.ui-tabs .ui-tabs-panel { padding: 24px; background: transparent; border: 0; }
 .ui-tabs .ui-tabs-panel.cms-edit-form { padding: 0; }
 .ui-tabs .ui-widget-header { border: 0; background: none; }
 .ui-tabs .ui-tabs-nav { float: right; margin: 16px 0 -1px 0; padding: 0 12px 0 0; border-bottom: none; }
@ -409,7 +409,7 @@ body.cms { overflow: hidden; }
 .ui-tabs.cms-tabset-primary .ui-tabs-nav li a, .ui-tabs .ui-tabs-nav.cms-tabset-nav-primary li a, .ui-tabs .cms-content-header-tabs .ui-tabs-nav li a { margin: 0; line-height: 39px; }
 .ui-tabs.cms-tabset-primary .ui-tabs-nav .ui-corner-all, .ui-tabs.cms-tabset-primary .ui-tabs-nav .ui-corner-top, .ui-tabs.cms-tabset-primary .ui-tabs-nav .ui-corner-right, .ui-tabs.cms-tabset-primary .ui-tabs-nav .ui-corner-tr, .ui-tabs.cms-tabset-primary .ui-tabs-nav .ui-corner-tl, .ui-tabs .ui-tabs-nav.cms-tabset-nav-primary .ui-corner-all, .ui-tabs .ui-tabs-nav.cms-tabset-nav-primary .ui-corner-top, .ui-tabs .ui-tabs-nav.cms-tabset-nav-primary .ui-corner-right, .ui-tabs .ui-tabs-nav.cms-tabset-nav-primary .ui-corner-tr, .ui-tabs .ui-tabs-nav.cms-tabset-nav-primary .ui-corner-tl, .ui-tabs .cms-content-header-tabs .ui-tabs-nav .ui-corner-all, .ui-tabs .cms-content-header-tabs .ui-tabs-nav .ui-corner-top, .ui-tabs .cms-content-header-tabs .ui-tabs-nav .ui-corner-right, .ui-tabs .cms-content-header-tabs .ui-tabs-nav .ui-corner-tr, .ui-tabs .cms-content-header-tabs .ui-tabs-nav .ui-corner-tl { border-radius: 0; }
 .ui-tabs.cms-tabset-primary .ui-tabs-nav .ui-state-default, .ui-tabs .ui-tabs-nav.cms-tabset-nav-primary .ui-state-default, .ui-tabs .cms-content-header-tabs .ui-tabs-nav .ui-state-default { -webkit-box-shadow: rgba(201, 205, 206, 0.8) 0 0 2px; -moz-box-shadow: rgba(201, 205, 206, 0.8) 0 0 2px; box-shadow: rgba(201, 205, 206, 0.8) 0 0 2px; background-color: #b0bec7; background-image: url('data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0idXRmLTgiPz4gPHN2ZyB2ZXJzaW9uPSIxLjEiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyI+PGRlZnM+PGxpbmVhckdyYWRpZW50IGlkPSJncmFkIiBncmFkaWVudFVuaXRzPSJ1c2VyU3BhY2VPblVzZSIgeDE9IjUwJSIgeTE9IjAlIiB4Mj0iNTAlIiB5Mj0iMTAwJSI+PHN0b3Agb2Zmc2V0PSIwJSIgc3RvcC1jb2xvcj0iI2Q0ZGJlMCIvPjxzdG9wIG9mZnNldD0iMTAwJSIgc3RvcC1jb2xvcj0iI2IwYmVjNyIvPjwvbGluZWFyR3JhZGllbnQ+PC9kZWZzPjxyZWN0IHg9IjAiIHk9IjAiIHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InVybCgjZ3JhZCkiIC8+PC9zdmc+IA=='); background-size: 100%; background-image: -webkit-gradient(linear, 50% 0%, 50% 100%, color-stop(0%, #d4dbe0), color-stop(100%, #b0bec7)); background-image: -webkit-linear-gradient(#d4dbe0, #b0bec7); background-image: -moz-linear-gradient(#d4dbe0, #b0bec7); background-image: -o-linear-gradient(#d4dbe0, #b0bec7); background-image: linear-gradient(#d4dbe0, #b0bec7); border-top: none; border-right-color: #8399a7; border-left-color: #ced7dc; }
-.ui-tabs.cms-tabset-primary .ui-tabs-nav .ui-state-active, .ui-tabs .ui-tabs-nav.cms-tabset-nav-primary .ui-state-active, .ui-tabs .cms-content-header-tabs .ui-tabs-nav .ui-state-active { -webkit-box-shadow: none; -moz-box-shadow: none; box-shadow: none; background: #e6eaed; background: #eceff1; border-top: none; border-right-color: #b3b3b3; border-left-color: #eceff1; z-index: 2; }
+.ui-tabs.cms-tabset-primary .ui-tabs-nav .ui-state-active, .ui-tabs .ui-tabs-nav.cms-tabset-nav-primary .ui-state-active, .ui-tabs .cms-content-header-tabs .ui-tabs-nav .ui-state-active { -webkit-box-shadow: none; -moz-box-shadow: none; box-shadow: none; background: #e6eaed; border-top: none; border-right-color: #b3b3b3; border-left-color: #eceff1; z-index: 2; }
 .ui-tabs.cms-tabset-primary .ui-tabs-nav .ui-state-active a, .ui-tabs .ui-tabs-nav.cms-tabset-nav-primary .ui-state-active a, .ui-tabs .cms-content-header-tabs .ui-tabs-nav .ui-state-active a { border-bottom: none; }
 .cms-content-header-tabs { float: right; }
@ -488,7 +488,7 @@ body.cms { overflow: hidden; }
 .cms-content-tools { background: #eceff1; width: 200px; overflow-y: auto; overflow-x: hidden; z-index: 70; border-right: 1px solid #c0c0c2; -webkit-box-shadow: rgba(248, 248, 248, 0.9) -1px 0 0 inset, 0 0 1px rgba(201, 205, 206, 0.8); -moz-box-shadow: rgba(248, 248, 248, 0.9) -1px 0 0 inset, 0 0 1px rgba(201, 205, 206, 0.8); box-shadow: rgba(248, 248, 248, 0.9) -1px 0 0 inset, 0 0 1px rgba(201, 205, 206, 0.8); float: left; position: relative; }
 .cms-content-tools.filter { padding: 0 !important; }
 .cms-content-tools .cms-panel-header { clear: both; margin: 0 0 7px; line-height: 24px; border-bottom: 1px solid #d0d3d5; -webkit-box-shadow: 0 1px 0 rgba(248, 248, 248, 0.9); -moz-box-shadow: 0 1px 0 rgba(248, 248, 248, 0.9); -o-box-shadow: 0 1px 0 rgba(248, 248, 248, 0.9); box-shadow: 0 1px 0 rgba(248, 248, 248, 0.9); }
-.cms-content-tools .cms-panel-content { width: 184px; padding: 16px 8px 0; overflow: auto; height: 100%; }
+.cms-content-tools .cms-panel-content { width: 184px; padding: 8.8px 8px 0; overflow: auto; height: 100%; }
 .cms-content-tools .cms-panel-content .Actions .ss-ui-action-constructive { margin-right: 5px; }
 .cms-content-tools .cms-content-header { background-color: #748d9d; background-image: url('data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0idXRmLTgiPz4gPHN2ZyB2ZXJzaW9uPSIxLjEiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyI+PGRlZnM+PGxpbmVhckdyYWRpZW50IGlkPSJncmFkIiBncmFkaWVudFVuaXRzPSJ1c2VyU3BhY2VPblVzZSIgeDE9IjUwJSIgeTE9IjAlIiB4Mj0iNTAlIiB5Mj0iMTAwJSI+PHN0b3Agb2Zmc2V0PSIwJSIgc3RvcC1jb2xvcj0iI2IwYmVjNyIvPjxzdG9wIG9mZnNldD0iMTAwJSIgc3RvcC1jb2xvcj0iIzc0OGQ5ZCIvPjwvbGluZWFyR3JhZGllbnQ+PC9kZWZzPjxyZWN0IHg9IjAiIHk9IjAiIHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InVybCgjZ3JhZCkiIC8+PC9zdmc+IA=='); background-size: 100%; background-image: -webkit-gradient(linear, 50% 0%, 50% 100%, color-stop(0%, #b0bec7), color-stop(100%, #748d9d)); background-image: -webkit-linear-gradient(#b0bec7, #748d9d); background-image: -moz-linear-gradient(#b0bec7, #748d9d); background-image: -o-linear-gradient(#b0bec7, #748d9d); background-image: linear-gradient(#b0bec7, #748d9d); }
 .cms-content-tools .cms-content-header h2 { text-shadow: #5c7382 -1px -1px 0; width: 176px; color: white; overflow: hidden; white-space: nowrap; text-overflow: ellipsis; -o-text-overflow: ellipsis; }
@ -556,7 +556,7 @@ form.member-profile-form #Permissions .optionset li { float: none; width: auto;
 .memberdatetimeoptionset .toggle { font-size: 11px; }
 .cms .cms-content { border-right: 1px solid #BBB; -webkit-border-radius: 0; -moz-border-radius: 0; -ms-border-radius: 0; -o-border-radius: 0; border-radius: 0; background: #eceff1; width: 800px; z-index: 40; }
-.cms .cms-content-fields { overflow-y: auto; overflow-x: auto; background: none; width: 100%; }
+.cms .cms-content-fields { overflow-y: auto; overflow-x: auto; background: #e6eaed; width: 100%; }
 .cms .cms-content-fields #Root_Main .confirmedpassword { border-bottom: none; box-shadow: none; }
 .cms .cms-content-fields #Root_Main .customFormat { max-width: 80px; }
 .cms .cms-content-fields #Root_Main .cms-help-toggle { text-indent: -9999em; display: inline-block; width: 20px; background: url(../images/question.png) no-repeat 0px 0px; }
@ -682,7 +682,7 @@ form.small .cms-file-info-data .field .middleColumn { margin-left: 120px; }
 .members_grid p button#action_export span.btn-icon-download-csv { height: 17px; }
 /** Import forms */
-body.SecurityAdmin { background-color: #eceff1; }
+body.SecurityAdmin { background-color: #e6eaed; }
 form.import-form ul { list-style: disc; }
 form.import-form ul li { margin-left: 20px; }
@ -821,7 +821,7 @@ li.class-ErrorPage > a a .jstree-pageicon { background-position: 0 -112px; }
 .cms-logo a { position: absolute; top: 8px; bottom: 8px; display: block; width: 24px; background: url("../images/logo_small.png") no-repeat left center; text-indent: -9999em; padding: 0 1px; left: 0; }
 .cms-logo span { font-weight: bold; font-size: 12px; line-height: 16px; padding: 2px 0; margin-left: 30px; }
-.cms-login-status { border-top: 1px solid #19435c; padding: 12px 0 17px; line-height: 16px; font-size: 11px; }
+.cms-login-status { border-top: 1px solid #19435c; padding: 8px 0 9.6px; line-height: 16px; font-size: 11px; }
 .cms-login-status .logout-link { display: inline-block; height: 16px; width: 16px; float: left; margin: 0 8px 0 5px; background: url('../images/sprites-32x32-sf6890c994e.png') 0 -292px no-repeat; text-indent: -9999em; }
 .cms-menu { z-index: 80; background: #b0bec7; width: 160px; -webkit-box-shadow: rgba(0, 0, 0, 0.9) 0 0 3px; -moz-box-shadow: rgba(0, 0, 0, 0.9) 0 0 3px; box-shadow: rgba(0, 0, 0, 0.9) 0 0 3px; }
--- a/admin/javascript/LeftAndMain.Tree.js
+++ b/admin/javascript/LeftAndMain.Tree.js
@ -96,6 +96,7 @@
 							$.ajax({
 								'url': self.data('urlSavetreenode'),
 								'type': 'POST',
 								'data': {
 									ID: nodeID, 
 									ParentID: newParentID,
--- a/admin/javascript/LeftAndMain.js
+++ b/admin/javascript/LeftAndMain.js
@ -144,6 +144,7 @@ jQuery.noConflict();
 				$('.ss-loading-screen').hide();
 				$('body').removeClass('loading');
 				$(window).unbind('resize', positionLoadingSpinner);
 				this.restoreTabState();
 				this._super();
 			},
@ -169,7 +170,8 @@ jQuery.noConflict();
 			 */
 			'from .cms-menu-list li a': {
 				onclick: function(e) {
-					if(e.which > 1) return;
+					var href = $(e.target).attr('href');
 					if(e.which > 1 || href == this._tabStateUrl()) return;
 					this.splitViewMode();
 				}
 			},
--- a/admin/scss/_menu.scss
+++ b/admin/scss/_menu.scss
@ -67,7 +67,7 @@
 .cms-login-status {
 	border-top: 1px solid $color-dark-separator;
-	padding: $grid-y*1.5 0 17px;
+	padding: $grid-y*1 0 $grid-y*1.2;
 	line-height: 16px;
 	font-size: $font-base-size - 1;
--- a/admin/scss/_style.scss
+++ b/admin/scss/_style.scss
@ -159,7 +159,7 @@ body.cms {
 	}
 	.ui-tabs-panel {
-		padding: $grid-x*2;
+		padding: $grid-x*3;
 		background: transparent; // default it's white
 		border: 0; // suppress default borders
 		&.cms-edit-form {
@ -309,8 +309,7 @@ body.cms {
 	.ui-state-active {
 		@include box-shadow(none);
-		background: darken($color-widget-bg, 2%);
+		background:darken($tab-panel-texture-color,2%);	
 		background: $tab-panel-texture-color;
 		border-top: none;
 		border: {
 			right-color: darken($color-tab, 15%); // same color as divider between header and body, needed for IE
@ -692,7 +691,7 @@ body.cms {
 	.cms-panel-content {
 		width: ($grid-x * 23);
-		padding: $grid-x*2 $grid-x 0; // smaller left/right padding to use space efficiently
+		padding: $grid-x*1.1 $grid-x 0; // smaller left/right padding to use space efficiently
 		overflow: auto;
 		height:100%;
@ -1024,7 +1023,7 @@ form.member-profile-form {
 		// can trigger longer pages and the extra scroll bar doesn't fire our sizing bar
 		overflow-y: auto;
 		overflow-x: auto;
-		background: none;
+		background:darken($tab-panel-texture-color,2%);	
 		width:100%;
 		#Root_Main {
 			.confirmedpassword {
@ -1596,7 +1595,7 @@ form.small {
 */
 body.SecurityAdmin {
-	background-color: $tab-panel-texture-color; //adds background to import members/groups iframe
+	background-color: darken($tab-panel-texture-color,2%); //adds background to import members/groups iframe
 }
 form.import-form {
--- a/api/RestfulService.php
+++ b/api/RestfulService.php
@ -298,6 +298,8 @@ class RestfulService extends ViewableData {
 	protected function extractResponse($ch, $rawResponse) {
 		//get the status code
 		$statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
 		//get a curl error if there is one
 		$curlError = curl_error($ch);
 		//normalise the status code
 		if(curl_error($ch) !== '' || $statusCode == 0) $statusCode = 500;
 		//calculate the length of the header and extract it
@ -588,7 +590,7 @@ class RestfulService_Response extends SS_HTTPResponse {
 			$this->cachedResponse = new RestfulService_Response($content);
 		}
 		else {
-			$this->cachedResponse->setBody = $content;
+			$this->cachedResponse->setBody($content);
 		}
 	}
--- a/control/ContentNegotiator.php
+++ b/control/ContentNegotiator.php
@ -28,6 +28,8 @@
 */
 class ContentNegotiator {
 	protected static $content_type = '';
 	protected static $encoding = 'utf-8';
 	protected static $enabled = false;
@ -122,48 +124,56 @@ class ContentNegotiator {
 		$negotiator->$chosenFormat( $response );
 	}
-	/**
+	/** 
-	 * Only sends the HTTP Content-Type as "application/xhtml+xml"
+	 * Check user defined content type and use it, if it's empty use the strict application/xhtml+xml.
-	 * if the template starts with the typical "<?xml" Pragma.
+	 * Replaces a few common tags and entities with their XHTML representations (<br>, <img>, &nbsp;
-	 * Assumes that a correct doctype is set, and doesn't change or append to it.
+	 * <input>, checked, selected).
 	 * Replaces a few common tags and entities with their XHTML representations (<br>, <img>, &nbsp;).
 	 *
 	 * @param $response SS_HTTPResponse
 	 * @return string
-	 * @todo More flexible tag and entity parsing through regular expressions or tag definition lists
+	 * @todo Search for more xhtml replacement
 	 */
 	public function xhtml(SS_HTTPResponse $response) {
 		$content = $response->getBody();
-		
+
-		// Only serve "pure" XHTML if the XML header is present
+		$contentType = Config::inst()->get('ContentNegotiator', 'content_type');
-		if(substr($content,0,5) == '<' . '?xml' ) {
+		if (empty($contentType)) {
 			$response->addHeader("Content-Type", "application/xhtml+xml; charset=" . self::$encoding);
 			$response->addHeader("Vary" , "Accept");
 			// Fix base tag
 			$content = preg_replace('/<base href="([^"]*)"><!--\[if[[^\]*]\] \/><!\[endif\]-->/', 
 				'<base href="$1" />', $content);
 			$content = str_replace('&nbsp;','&#160;', $content);
 			$content = str_replace('<br>','<br />', $content);
 			$content = preg_replace('#(<img[^>]*[^/>])>#i', '\\1/>', $content);
 			$response->setBody($content);
 		} else {
-			return $this->html($response);
+			$response->addHeader("Content-Type", $contentType . "; charset=" . self::$encoding);
 		}
 		$response->addHeader("Vary" , "Accept");
 		// Fix base tag
 		$content = preg_replace('/<base href="([^"]*)"><!--\[if[[^\]*]\] \/><!\[endif\]-->/', 
 			'<base href="$1" />', $content);
 		$content = str_replace('&nbsp;','&#160;', $content);
 		$content = str_replace('<br>','<br />', $content);
 		$content = str_replace('<hr>','<hr />', $content);
 		$content = preg_replace('#(<img[^>]*[^/>])>#i', '\\1/>', $content);
 		$content = preg_replace('#(<input[^>]*[^/>])>#i', '\\1/>', $content);
 		$content = preg_replace("#(\<option[^>]*[\s]+selected)(?!\s*\=)#si", "$1=\"selected\"$2", $content);
 		$content = preg_replace("#(\<input[^>]*[\s]+checked)(?!\s*\=)#si", "$1=\"checked\"$2", $content);
 		$response->setBody($content);
 	}
 	/*
-	 * Sends HTTP Content-Type as "text/html", and replaces existing doctypes with
+	 * Check user defined content type and use it, if it's empty use the text/html.
-	 * HTML4.01 Strict.
+	 * If find a XML header replaces it and existing doctypes with HTML4.01 Strict.
 	 * Replaces self-closing tags like <img /> with unclosed solitary tags like <img>.
 	 * Replaces all occurrences of "application/xhtml+xml" with "text/html" in the template.
 	 * Removes "xmlns" attributes and any <?xml> Pragmas.
 	 */
 	public function html(SS_HTTPResponse $response) {
-		$response->addHeader("Content-Type", "text/html; charset=" . self::$encoding);
+		
 		$contentType = Config::inst()->get('ContentNegotiator', 'content_type');
 		if (empty($contentType)) {
 			$response->addHeader("Content-Type", "text/html; charset=" . self::$encoding);
 		} else {
 			$response->addHeader("Content-Type", $contentType . "; charset=" . self::$encoding);
 		}
 		$response->addHeader("Vary", "Accept");
 		$content = $response->getBody();
--- a/control/Controller.php
+++ b/control/Controller.php
@ -157,9 +157,9 @@ class Controller extends RequestHandler implements TemplateGlobalProvider {
 					. "returning it without modification.");
 			}
 			$this->response = $body;
-			
+
 		} else {
-			if(is_object($body)) {
+			if($body instanceof Object && $body->hasMethod('getViewer')) {
 				if(isset($_REQUEST['debug_request'])) {
 					Debug::message("Request handler $body->class object to $this->class controller;"
 						. "rendering with template returned by $body->class::getViewer()");
--- a/control/HTTPRequest.php
+++ b/control/HTTPRequest.php
@ -104,6 +104,24 @@ class SS_HTTPRequest implements ArrayAccess {
 	 */
 	public function __construct($httpMethod, $url, $getVars = array(), $postVars = array(), $body = null) {
 		$this->httpMethod = strtoupper(self::detect_method($httpMethod, $postVars));
 		$this->setUrl($url);
 		$this->getVars = (array)$getVars;
 		$this->postVars = (array)$postVars;
 		$this->body = $body;
 	}
 	/**
 	 * Allow the setting of a URL
 	 *
 	 * This is here so that RootURLController can change the URL of the request
 	 * without us loosing all the other info attached (like headers)
 	 *
 	 * @param string The new URL
 	 *
 	 * @return SS_HTTPRequest The updated request
 	 */
 	public function setUrl($url) {
 		$this->url = $url;
 		// Normalize URL if its relative (strictly speaking), or has leading slashes
@ -116,10 +134,8 @@ class SS_HTTPRequest implements ArrayAccess {
 		}
 		if($this->url) $this->dirParts = preg_split('|/+|', $this->url);
 		else $this->dirParts = array();
-		
+
-		$this->getVars = (array)$getVars;
+		return $this;
 		$this->postVars = (array)$postVars;
 		$this->body = $body;
 	}
 	/**
--- a/control/HTTPResponse.php
+++ b/control/HTTPResponse.php
@ -159,7 +159,7 @@ class SS_HTTPResponse {
 	 * @return SS_HTTPRequest $this
 	 */
 	public function setBody($body) {
-		$this->body = $body;
+		$this->body = $body ? (string)$body : $body; // Don't type-cast false-ish values, eg null is null not ''
 	}
 	/**
--- a/control/PjaxResponseNegotiator.php
+++ b/control/PjaxResponseNegotiator.php
@ -79,7 +79,8 @@ class PjaxResponseNegotiator {
 		// Execute the fragment callbacks and build the response.
 		foreach($fragments as $fragment) {
 			if(isset($callbacks[$fragment])) {
-				$responseParts[$fragment] = call_user_func($callbacks[$fragment]);
+				$res = call_user_func($callbacks[$fragment]);
 				$responseParts[$fragment] = $res ? (string)$res : $res;
 			} else {
 				throw new SS_HTTPResponse_Exception("X-Pjax = '$fragment' not supported for this URL.", 400);
 			}
--- a/core/Config.php
+++ b/core/Config.php
@ -164,12 +164,43 @@ class Config {
 	}
 	/**
-	 * Empty construction, otherwise calling singleton('Config') (not the right way to get the current active config
+	 * Make the newly active Config be a copy of the current active Config instance.
-	 * instance, but people might) gives an error
+	 *
 	 * You can then make changes to the configuration by calling update and remove on the new
 	 * value returned by Config::inst(), and then discard those changes later by calling unnest
 	 */
 	static public function nest() {
 		$current = self::$instance;
 		$new = clone $current;
 		$new->nestedFrom = $current;
 		self::set_instance($new);
 	}
 	/**
 	 * Change the active Config back to the Config instance the current active Config object
 	 * was copied from
 	 */
 	static public function unnest() {
 		self::set_instance(self::$instance->nestedFrom);
 	}
 	protected $cache;
 	/**
 	 * Each copy of the Config object need's it's own cache, so changes don't leak through to other instances
 	 */
 	public function __construct() {
 		$this->cache = new Config_LRU();
 	}
 	public function __clone() {
 		$this->cache = clone $this->cache;
 	}
 	/** @var Config - The config instance this one was copied from when Config::nest() was called */
 	protected $nestedFrom = null;
 	/** @var [array] - Array of arrays. Each member is an nested array keyed as $class => $name => $value,
 	 * where value is a config value to treat as the highest priority item */
 	protected $overrides = array();
@ -178,6 +209,13 @@ class Config {
 	 * where value is a config value suppress from any lower priority item */
 	protected $suppresses = array();
 	protected $staticManifests = array();
 	public function pushConfigStaticManifest(SS_ConfigStaticManifest $manifest) {
 		array_unshift($this->staticManifests, $manifest);
 		$this->cache->clean();
 	}
 	/** @var [array] - The list of settings pulled from config files to search through */
 	protected $manifests = array();
@ -187,8 +225,9 @@ class Config {
 	 * WARNING: Config manifests to not merge entries, and do not solve before/after rules inter-manifest -
 	 * instead, the last manifest to be added always wins
 	 */
-	public function pushConfigManifest(SS_ConfigManifest $manifest) {
+	public function pushConfigYamlManifest(SS_ConfigManifest $manifest) {
 		array_unshift($this->manifests, $manifest->yamlConfig);
 		$this->cache->clean();
 		// @todo: Do anything with these. They're for caching after config.php has executed
 		$this->collectConfigPHPSettings = true;
@ -342,34 +381,17 @@ class Config {
 		return $res;
 	}
-	/**
+	protected $extraConfigSources = array();
-	 * Get the config value associated for a given class and property
+
-	 *
+	public function extraConfigSourcesChanged($class) {
-	 * This merges all current sources and overrides together to give final value
+		unset($this->extraConfigSources[$class]);
-	 * todo: Currently this is done every time. This function is an inner loop function, so we really need to be
+		$this->cache->clean("__{$class}");
-	 * caching heavily here.
+	}
-	 *
+
-	 * @param $class string - The name of the class to get the value for
+	protected function getUncached($class, $name, $sourceOptions, &$result, $suppress, &$tags) {
-	 * @param $name string - The property to get the value for
+		$tags[] = "__{$class}";
-	 * @param int $sourceOptions Bitmask which can be set to some combintain of Config::UNINHERITED,
+		$tags[] = "__{$class}__{$name}";
-	 *                           Config::FIRST_SET, and Config::EXCLUDE_EXTENSIONS.
+
 	 *                           
 	 *   Config::UNINHERITED does not include parent classes when merging configuration fragments
 	 *   Config::FIRST_SET stops inheriting once the first class that sets a value (even an empty value) is encoutered
 	 *   Config::EXCLUDE_EXTRA_SOURCES does not include any additional static sources (such as extensions)
 	 *
 	 *   Config::INHERITED is a utility constant that can be used to mean "none of the above", equvilient to 0
 	 *   Setting both Config::UNINHERITED and Config::FIRST_SET behaves the same as just Config::UNINHERITED
 	 *
 	 * should the parent classes value be merged in as the lowest priority source?
 	 * @param $result array|scalar Reference to a variable to put the result in. Also returned, so this can be left
 	 *                             as null safely. If you do pass a value, it will be treated as the highest priority
 	 *                             value in the result chain
 	 * @param $suppress array Internal use when called by child classes. Array of mask pairs to filter value by
 	 * @return array|scalar The value of the config item, or null if no value set. Could be an associative array,
 	 *                      sequential array or scalar depending on value (see class docblock)
 	 */
 	public function get($class, $name, $sourceOptions = 0, &$result = null, $suppress = null) {
 		// If result is already not something to merge into, just return it
 		if ($result !== null && !is_array($result)) return $result;
@ -397,19 +419,32 @@ class Config {
 			}
 		}
 		// Then look at the static variables
 		$nothing = new stdClass();
 		$sources = array($class);
 		// Include extensions only if not flagged not to, and some have been set
 		if (($sourceOptions & self::EXCLUDE_EXTRA_SOURCES) != self::EXCLUDE_EXTRA_SOURCES) {
-			$extraSources = Object::get_extra_config_sources($class);
+			// If we don't have a fresh list of extra sources, get it from the class itself
 			if (!array_key_exists($class, $this->extraConfigSources)) {
 				$this->extraConfigSources[$class] = Object::get_extra_config_sources($class);
 			}
 			// Update $sources with any extra sources
 			$extraSources = $this->extraConfigSources[$class];
 			if ($extraSources) $sources = array_merge($sources, $extraSources);
 		}
 		$value = $nothing = null;
 		foreach ($sources as $staticSource) {
-			if (is_array($staticSource)) $value = isset($staticSource[$name]) ? $staticSource[$name] : $nothing;
+			if (is_array($staticSource)) {
-			else $value = Object::static_lookup($staticSource, $name, $nothing);
+				$value = isset($staticSource[$name]) ? $staticSource[$name] : $nothing;
 			}
 			else {
 				foreach ($this->staticManifests as $i => $statics) {
 					$value = $statics->get($staticSource, $name, $nothing);
 					if ($value !== $nothing) break;
 				}
 			}
 			if ($value !== $nothing) {
 				self::merge_low_into_high($result, $value, $suppress);
@ -418,14 +453,53 @@ class Config {
 		}
 		// Finally, merge in the values from the parent class
-		if (($sourceOptions & self::UNINHERITED) != self::UNINHERITED 
+		if (
-				&& (($sourceOptions & self::FIRST_SET) != self::FIRST_SET || $result === null)) {
+			($sourceOptions & self::UNINHERITED) != self::UNINHERITED &&
 			(($sourceOptions & self::FIRST_SET) != self::FIRST_SET || $result === null)
 		) {
 			$parent = get_parent_class($class);
-			if ($parent) $this->get($parent, $name, $sourceOptions, $result, $suppress);
+			if ($parent) $this->getUncached($parent, $name, $sourceOptions, $result, $suppress, $tags);
 		}
-		if ($name == 'routes') {
+		return $result;
-			print_r($result); die;
+	}
 	/**
 	 * Get the config value associated for a given class and property
 	 *
 	 * This merges all current sources and overrides together to give final value
 	 * todo: Currently this is done every time. This function is an inner loop function, so we really need to be
 	 * caching heavily here.
 	 *
 	 * @param $class string - The name of the class to get the value for
 	 * @param $name string - The property to get the value for
 	 * @param int $sourceOptions Bitmask which can be set to some combintain of Config::UNINHERITED,
 	 *                           Config::FIRST_SET, and Config::EXCLUDE_EXTENSIONS.
 	 *
 	 *   Config::UNINHERITED does not include parent classes when merging configuration fragments
 	 *   Config::FIRST_SET stops inheriting once the first class that sets a value (even an empty value) is encoutered
 	 *   Config::EXCLUDE_EXTRA_SOURCES does not include any additional static sources (such as extensions)
 	 *
 	 *   Config::INHERITED is a utility constant that can be used to mean "none of the above", equvilient to 0
 	 *   Setting both Config::UNINHERITED and Config::FIRST_SET behaves the same as just Config::UNINHERITED
 	 *
 	 * should the parent classes value be merged in as the lowest priority source?
 	 * @param $result array|scalar Reference to a variable to put the result in. Also returned, so this can be left
 	 *                             as null safely. If you do pass a value, it will be treated as the highest priority
 	 *                             value in the result chain
 	 * @param $suppress array Internal use when called by child classes. Array of mask pairs to filter value by
 	 * @return array|scalar The value of the config item, or null if no value set. Could be an associative array,
 	 *                      sequential array or scalar depending on value (see class docblock)
 	 */
 	public function get($class, $name, $sourceOptions = 0, &$result = null, $suppress = null) {
 		// Have we got a cached value? Use it if so
 		$key = $class.$name.$sourceOptions;
 		if (($result = $this->cache->get($key)) === false) {
 			$tags = array();
 			$result = null;
 			$this->getUncached($class, $name, $sourceOptions, $result, $suppress, $tags);
 			$this->cache->set($key, $result, $tags);
 		}
 		return $result;
@ -452,6 +526,8 @@ class Config {
 		if (!isset($this->overrides[0][$class][$name])) $this->overrides[0][$class][$name] = $val;
 		else self::merge_high_into_low($this->overrides[0][$class][$name], $val);
 		$this->cache->clean("__{$class}__{$name}");
 	}
 	/**
@ -512,6 +588,91 @@ class Config {
 }
 class Config_LRU {
 	const SIZE = 1000;
 	protected $cache;
 	protected $indexing;
 	protected $i = 0;
 	protected $c = 0;
 	public function __construct() {
 		$this->cache = new SplFixedArray(self::SIZE);
 		// Pre-fill with stdClass instances. By reusing we avoid object-thrashing
 		for ($i = 0; $i < self::SIZE; $i++) {
 			$this->cache[$i] = new stdClass();
 			$this->cache[$i]->key = null;
 		}
 		$this->indexing = array();
 	}
 	public function set($key, $val, $tags = array()) {
 		// Find an index to set at
 		$replacing = null;
 		// Target count - not always the lowest, but guaranteed to exist (or hit an empty item)
 		$target = $this->c - self::SIZE + 1;
 		$i = $stop = $this->i;
 		do {
 			if (!($i--)) $i = self::SIZE-1;
 			$item = $this->cache[$i];
 			if ($item->key === null) { $replacing = null; break; }
 			else if ($item->c <= $target) { $replacing = $item; break; }
 		}
 		while ($i != $stop);
 		if ($replacing) unset($this->indexing[$replacing->key]);
 		$this->indexing[$key] = $this->i = $i;
 		$obj = $this->cache[$i];
 		$obj->key = $key;
 		$obj->value = $val;
 		$obj->tags = $tags;
 		$obj->c = ++$this->c;
 	}
 	private $hit = 0;
 	private $miss = 0;
 	public function stats() {
 		return $this->miss ? ($this->hit / $this->miss) : 0;
 	}
 	public function get($key) {
 		if (isset($this->indexing[$key])) {
 			$this->hit++;
 			$res = $this->cache[$this->indexing[$key]];
 			$res->c = ++$this->c;
 			return $res->value;
 		}
 		$this->miss++;
 		return false;
 	}
 	public function clean($tag = null) {
 		if ($tag) {
 			foreach ($this->cache as $i => $v) {
 				if ($v->key !== null && in_array($tag, $v->tags)) {
 					unset($this->indexing[$v->key]);
 					$this->cache[$i]->key = null;
 				}
 			}
 		}
 		else {
 			for ($i = 0; $i < self::SIZE; $i++) $this->cache[$i]->key = null;
 			$this->indexing = array();
 		}
 	}
 }
 class Config_ForClass {
 	protected $class;
--- a/core/Core.php
+++ b/core/Core.php
@ -254,7 +254,10 @@ require_once 'dev/Backtrace.php';
 require_once 'dev/ZendLog.php';
 require_once 'dev/Log.php';
 require_once 'filesystem/FileFinder.php';
 require_once 'core/manifest/ManifestCache.php';
 require_once 'core/manifest/ClassLoader.php';
 require_once 'core/manifest/ConfigManifest.php';
 require_once 'core/manifest/ConfigStaticManifest.php';
 require_once 'core/manifest/ClassManifest.php';
 require_once 'core/manifest/ManifestFileFinder.php';
 require_once 'core/manifest/TemplateLoader.php';
@ -288,9 +291,13 @@ if(file_exists(BASE_PATH . '/vendor/autoload.php')) {
 	require_once BASE_PATH . '/vendor/autoload.php';
 }
 // Now that the class manifest is up, load the configuration
 $configManifest = new SS_ConfigStaticManifest(BASE_PATH, false, $flush);
 Config::inst()->pushConfigStaticManifest($configManifest);
 // Now that the class manifest is up, load the configuration
 $configManifest = new SS_ConfigManifest(BASE_PATH, false, $flush);
-Config::inst()->pushConfigManifest($configManifest);
+Config::inst()->pushConfigYamlManifest($configManifest);
 SS_TemplateLoader::instance()->pushManifest(new SS_TemplateManifest(
 	BASE_PATH, project(), false, isset($_GET['flush'])
--- a/core/Diff.php
+++ b/core/Diff.php
@ -686,7 +686,7 @@ class Diff
 			$content = $cleaner->cleanHTML($content);
 		} else {
 			// At most basic level of cleaning, use DOMDocument to save valid XML.
-			$doc = new SS_HTMLValue($content);
+			$doc = Injector::inst()->create('HTMLValue', $content);
 			$content = $doc->getContent();
 		}
--- a/core/HTMLCleaner.php
+++ b/core/HTMLCleaner.php
@ -69,7 +69,7 @@ class PurifierHTMLCleaner extends HTMLCleaner {
 	public function cleanHTML($content) {
 		$html = new HTMLPurifier();
-		$doc = new SS_HTMLValue($html->purify($content));
+		$doc = Injector::inst()->create('HTMLValue', $html->purify($content));
 		return $doc->getContent();
 	}
 }
--- a/core/Object.php
+++ b/core/Object.php
@ -52,23 +52,12 @@ abstract class Object {
 	 */
 	public $class;
 	/**
 	 * @todo Set this via dependancy injection? Can't call it $config, because too many clashes with form elements etc
 	 * @var Config_ForClass
 	 */
 	private $_config_forclass = null;
 	/**
 	 * Get a configuration accessor for this class. Short hand for Config::inst()->get($this->class, .....).
 	 * @return Config_ForClass|null
 	 */
-	public function config() {
+	static public function config() {
-		if (!$this->_config_forclass) {
+		return Config::inst()->forClass(get_called_class());
 			$this->_config_forclass = Config::inst()->forClass($this->class);
 		}
 		return $this->_config_forclass;
 	}
 	/**
@ -494,10 +483,11 @@ abstract class Object {
 		if($subclasses) foreach($subclasses as $subclass) {
 			unset(self::$classes_constructed[$subclass]);
 			unset(self::$extra_methods[$subclass]);
 			unset(self::$extension_sources[$subclass]);
 		}
 		Config::inst()->update($class, 'extensions', array($extension));
 		Config::inst()->extraConfigSourcesChanged($class);
 		Injector::inst()->unregisterAllObjects();
 		// load statics now for DataObject classes
@ -534,6 +524,7 @@ abstract class Object {
 		}
 		Config::inst()->remove($class, 'extensions', Config::anything(), $extension);
 		Config::inst()->extraConfigSourcesChanged($class);
 		// unset singletons to avoid side-effects
 		Injector::inst()->unregisterAllObjects();
@ -544,7 +535,6 @@ abstract class Object {
 		if($subclasses) foreach($subclasses as $subclass) {
 			unset(self::$classes_constructed[$subclass]);
 			unset(self::$extra_methods[$subclass]);
 			unset(self::$extension_sources[$subclass]);
 		}
 	}
@ -571,9 +561,6 @@ abstract class Object {
 	// --------------------------------------------------------------------------------------------------------------
 	private static $extension_sources = array();
 	// Don't bother checking some classes that should never be extended
 	private static $unextendable_classes = array('Object', 'ViewableData', 'RequestHandler');
 	static public function get_extra_config_sources($class = null) {
@ -582,9 +569,6 @@ abstract class Object {
 		// If this class is unextendable, NOP
 		if(in_array($class, self::$unextendable_classes)) return;
 		// If we have a pre-cached version, use that
 		if(array_key_exists($class, self::$extension_sources)) return self::$extension_sources[$class];
 		// Variable to hold sources in
 		$sources = null;
@ -615,7 +599,7 @@ abstract class Object {
 			}
 		}
-		return self::$extension_sources[$class] = $sources;
+		return $sources;
 	}
 	public function __construct() {
--- a/core/manifest/ClassManifest.php
+++ b/core/manifest/ClassManifest.php
@ -117,11 +117,10 @@ class SS_ClassManifest {
 		$this->base  = $base;
 		$this->tests = $includeTests;
-		$this->cache = SS_Cache::factory('SS_ClassManifest', 'Core', array(
+		$cacheClass = defined('SS_MANIFESTCACHE') ? SS_MANIFESTCACHE : 'ManifestCache_File';
-			'automatic_serialization' => true,
+
-			'lifetime' => null
+		$this->cache = new $cacheClass('classmanifest'.($includeTests ? '_tests' : ''));
-		));
+		$this->cacheKey = 'manifest';
 		$this->cacheKey = $this->tests ? 'manifest_tests' : 'manifest';
 		if (!$forceRegen && $data = $this->cache->load($this->cacheKey)) {
 			$this->classes      = $data['classes'];
@ -343,7 +342,7 @@ class SS_ClassManifest {
 			$interfaces = self::get_interface_parser()->findAll($tokens);
 			$cache = array('classes' => $classes, 'interfaces' => $interfaces, 'namespace' => $namespace);
-			$this->cache->save($cache, $key, array('fileparse'));
+			$this->cache->save($cache, $key);
 		}
 		foreach ($classes as $class) {
--- a/core/manifest/ConfigStaticManifest.php
+++ b/core/manifest/ConfigStaticManifest.php
@ -0,0 +1,334 @@
 <?php
 /**
 * A utility class which builds a manifest of the statics defined in all classes, along with their
 * access levels and values
 *
 * We use this to make the statics that the Config system uses as default values be truely immutable.
 *
 * It has the side effect of allowing Config to avoid private-level access restrictions, so we can
 * optionally catch attempts to modify the config statics (otherwise the modification will appear
 * to work, but won't actually have any effect - the equvilent of failing silently)
 *
 * @subpackage manifest
 */
 class SS_ConfigStaticManifest {
 	protected $base;
 	protected $tests;
 	protected $cache;
 	protected $key;
 	protected $index;
 	protected $statics;
 	static protected $initial_classes = array(
 		'Object', 'ViewableData', 'Injector', 'Director'
 	);
 	/**
 	 * Constructs and initialises a new config static manifest, either loading the data
 	 * from the cache or re-scanning for classes.
 	 *
 	 * @param string $base The manifest base path.
 	 * @param bool   $includeTests Include the contents of "tests" directories.
 	 * @param bool   $forceRegen Force the manifest to be regenerated.
 	 * @param bool   $cache If the manifest is regenerated, cache it.
 	 */
 	public function __construct($base, $includeTests = false, $forceRegen = false, $cache = true) {
 		$this->base  = $base;
 		$this->tests = $includeTests;
 		$cacheClass = defined('SS_MANIFESTCACHE') ? SS_MANIFESTCACHE : 'ManifestCache_File';
 		$this->cache = new $cacheClass('staticmanifest'.($includeTests ? '_tests' : ''));
 		$this->key = sha1($base);
 		if(!$forceRegen) {
 			$this->index = $this->cache->load($this->key);
 		}
 		if($this->index) {
 			$this->statics = $this->index['$statics'];
 		}
 		else {
 			$this->regenerate($cache);
 		}
 	}
 	public function get($class, $name, $default) {
 		if (!isset($this->statics[$class])) {
 			if (isset($this->index[$class])) {
 				$info = $this->index[$class];
 				if (isset($info['key']) && $details = $this->cache->load($this->key.'_'.$info['key'])) {
 					$this->statics += $details;
 				}
 				if (!isset($this->statics[$class])) {
 					$this->handleFile(null, $info['path'], null);
 				}
 			}
 			else {
 				$this->statics[$class] = false;
 			}
 		}
 		if (isset($this->statics[$class][$name])) {
 			$static = $this->statics[$class][$name];
 			if ($static['access'] != T_PRIVATE) {
 				Deprecation::notice('3.1.0', "Config static $class::\$$name must be marked as private", Deprecation::SCOPE_GLOBAL);
 				// Don't warn more than once per static
 				$static['access'] = T_PRIVATE;
 			}
 			return $static['value'];
 		}
 		return $default;
 	}
 	/**
 	 * Completely regenerates the manifest file.
 	 */
 	public function regenerate($cache = true) {
 		$this->index = array('$statics' => array());
 		$this->statics = array();
 		$finder = new ManifestFileFinder();
 		$finder->setOptions(array(
 			'name_regex'    => '/^([^_].*\.php)$/',
 			'ignore_files'  => array('index.php', 'main.php', 'cli-script.php'),
 			'ignore_tests'  => !$this->tests,
 			'file_callback' => array($this, 'handleFile')
 		));
 		$finder->find($this->base);
 		if($cache) {
 			$keysets = array();
 			foreach ($this->statics as $class => $details) {
 				if (in_array($class, self::$initial_classes)) {
 					$this->index['$statics'][$class] = $details;
 				}
 				else {
 					$key = sha1($class);
 					$this->index[$class]['key'] = $key;
 					$keysets[$key][$class] = $details;
 				}
 			}
 			foreach ($keysets as $key => $details) {
 				$this->cache->save($details, $this->key.'_'.$key);
 			}
 			$this->cache->save($this->index, $this->key);
 		}
 	}
 	public function handleFile($basename, $pathname, $depth) {
 		$parser = new SS_ConfigStaticManifest_Parser($pathname);
 		$parser->parse();
 		$this->index = array_merge($this->index, $parser->getInfo());
 		$this->statics = array_merge($this->statics, $parser->getStatics());
 	}
 	public function getStatics() {
 		return $this->statics;
 	}
 }
 /**
 * A parser that processes a PHP file, using PHP's built in parser to get a string of tokens,
 * then processing them to find the static class variables, their access levels & values
 *
 * We can't do this using TokenisedRegularExpression because we need to keep track of state
 * as we process the token list (when we enter and leave a namespace or class, when we see
 * an access level keyword, etc)
 */
 class SS_ConfigStaticManifest_Parser {
 	protected $info = array();
 	protected $statics = array();
 	protected $path;
 	protected $tokens;
 	protected $length;
 	protected $pos;
 	function __construct($path) {
 		$this->path = $path;
 		$file = file_get_contents($path);
 		$this->tokens = token_get_all($file);
 		$this->length = count($this->tokens);
 		$this->pos = 0;
 	}
 	function getInfo() {
 		return $this->info;
 	}
 	function getStatics() {
 		return $this->statics;
 	}
 	/**
 	 * Get the next token to process, incrementing the pointer
 	 *
 	 * @param bool $ignoreWhitespace - if true will skip any whitespace tokens & only return non-whitespace ones
 	 * @return null | int - Either the next token or null if there isn't one
 	 */
 	protected function next($ignoreWhitespace = true) {
 		do {
 			if($this->pos >= $this->length) return null;
 			$next = $this->tokens[$this->pos++];
 		}
 		while($ignoreWhitespace && is_array($next) && $next[0] == T_WHITESPACE);
 		return $next;
 	}
 	/**
 	 * Parse the given file to find the static variables declared in it, along with their access & values
 	 */
 	function parse() {
 		$depth = 0; $namespace = null; $class = null; $clsdepth = null; $access = 0;
 		while($token = $this->next()) {
 			$type = is_array($token) ? $token[0] : $token;
 			if($type == T_CLASS) {
 				$next = $this->next();
 				if($next[0] != T_STRING) {
 					user_error("Couldn\'t parse {$this->path} when building config static manifest", E_USER_ERROR);
 				}
 				$class = $next[1];
 			}
 			else if($type == T_NAMESPACE) {
 				$next = $this->next();
 				if($next[0] != T_STRING) {
 					user_error("Couldn\'t parse {$this->path} when building config static manifest", E_USER_ERROR);
 				}
 				$namespace = $next[1];
 			}
 			else if($type == '{' || $type == T_CURLY_OPEN || $type == T_DOLLAR_OPEN_CURLY_BRACES){
 				$depth += 1;
 				if($class && !$clsdepth) $clsdepth = $depth;
 			}
 			else if($type == '}') {
 				$depth -= 1;
 				if($depth < $clsdepth) $class = $clsdepth = null;
 				if($depth < 0) user_error("Hmm - depth calc wrong, hit negatives", E_USER_ERROR);
 			}
 			else if($type == T_PUBLIC || $type == T_PRIVATE || $type == T_PROTECTED) {
 				$access = $type;
 			}
 			else if($type == T_STATIC) {
 				if($class && $depth == $clsdepth) $this->parseStatic($access, $namespace ? $namespace.'\\'.$class : $class);
 			}
 			else {
 				$access = '';
 			}
 		}
 	}
 	/**
 	 * During parsing we've found a "static" keyword. Parse out the variable names and value
 	 * assignments that follow.
 	 *
 	 * Seperated out from parse partially so that we can recurse if there are multiple statics
 	 * being declared in a comma seperated list
 	 */
 	function parseStatic($access, $class) {
 		$variable = null;
 		$value = '';
 		while($token = $this->next()) {
 			$type = is_array($token) ? $token[0] : $token;
 			if($type == T_PUBLIC || $type == T_PRIVATE || $type == T_PROTECTED) {
 				$access = $type;
 			}
 			else if($type == T_FUNCTION) {
 				return;
 			}
 			else if($type == T_VARIABLE) {
 				$variable = substr($token[1], 1); // Cut off initial "$"
 			}
 			else if($type == ';' || $type == ',' || $type == '=') {
 				break;
 			}
 			else if($type == T_COMMENT || $type == T_DOC_COMMENT) {
 				// NOP
 			}
 			else {
 				user_error('Unexpected token when building static manifest: '.print_r($token, true), E_USER_ERROR);
 			}
 		}
 		if($token == '=') {
 			$depth = 0;
 			while($token = $this->next(false)){
 				$type = is_array($token) ? $token[0] : $token;
 				// Track array nesting depth
 				if($type == T_ARRAY) {
 					$depth += 1;
 				}
 				else if($type == ')') {
 					$depth -= 1;
 				}
 				// Parse out the assignment side of a static declaration, ending on either a ';' or a ',' outside an array
 				if($type == T_WHITESPACE) {
 					$value .= ' ';
 				}
 				else if($type == ';' || ($type == ',' && !$depth)) {
 					break;
 				}
 				// Statics can reference class constants with self:: (and that won't work in eval)
 				else if($type == T_STRING && $token[1] == 'self') {
 					$value .= $class;
 				}
 				else {
 					$value .= is_array($token) ? $token[1] : $token;
 				}
 			}
 		}
 		if (!isset($this->info[$class])) {
 			$this->info[$class] = array(
 				'path' => $this->path,
 				'mtime' => filemtime($this->path),
 			);
 		}
 		if(!isset($this->statics[$class])) {
 			$this->statics[$class] = array();
 		}
 		$value = trim($value);
 		if ($value) {
 			$value = eval('static $temp = '.$value.";\n".'return $temp'.";\n");
 		}
 		else {
 			$value = null;
 		}
 		$this->statics[$class][$variable] = array(
 			'access' => $access,
 			'value' => $value
 		);
 		if($token == ',') $this->parseStatic($access, $class);
 	}
 }
--- a/core/manifest/ManifestCache.php
+++ b/core/manifest/ManifestCache.php
@ -0,0 +1,79 @@
 <?php
 /**
 * A basic caching interface that manifests use to store data.
 */
 interface ManifestCache {
 	public function __construct($name);
 	public function load($key);
 	public function save($data, $key);
 	public function clear();
 }
 /**
 * Stores manifest data in files in TEMP_DIR dir on filesystem
 */
 class ManifestCache_File implements ManifestCache {
 	function __construct($name) {
 		$this->folder = TEMP_FOLDER.'/'.$name;
 		if (!is_dir($this->folder)) mkdir($this->folder);
 	}
 	function load($key) {
 		$file = $this->folder.'/cache_'.$key;
 		return file_exists($file) ? unserialize(file_get_contents($file)) : null;
 	}
 	function save($data, $key) {
 		$file = $this->folder.'/cache_'.$key;
 		file_put_contents($file, serialize($data));
 	}
 	function clear() {
 		array_map('unlink', glob($this->folder.'/cache_*'));
 	}
 }
 /**
 * Same as ManifestCache_File, but stores the data as valid PHP which gets included to load
 * This is a bit faster if you have an opcode cache installed, but slower otherwise
 */
 class ManifestCache_File_PHP extends ManifestCache_File {
 	function load($key) {
 		global $loaded_manifest;
 		$loaded_manifest = null;
 		$file = $this->folder.'/cache_'.$key;
 		if (file_exists($file)) include $file;
 		return $loaded_manifest;
 	}
 	function save($data, $key) {
 		$file = $this->folder.'/cache_'.$key;
 		file_put_contents($file, '<?php $loaded_manifest = '.var_export($data, true).';');
 	}
 }
 /**
 * Stores manifest data in APC.
 * Note: benchmarks seem to indicate this is not particularly faster than _File
 */
 class ManifestCache_APC implements ManifestCache {
 	protected $pre;
 	function __construct($name) {
 		$this->pre = $name;
 	}
 	function load($key) {
 		return apc_fetch($this->pre.$key);
 	}
 	function save($data, $key) {
 		apc_store($this->pre.$key, $data);
 	}
 	function clear() {
 	}
 }
--- a/core/manifest/TemplateManifest.php
+++ b/core/manifest/TemplateManifest.php
@ -35,13 +35,12 @@ class SS_TemplateManifest {
 		$this->project = $project;
-		$this->cacheKey   = $this->tests ? 'manifest_tests' : 'manifest';
+		$cacheClass = defined('SS_MANIFESTCACHE') ? SS_MANIFESTCACHE : 'ManifestCache_File';
 		$this->forceRegen = $forceRegen;
-		$this->cache = SS_Cache::factory('SS_TemplateManifest', 'Core', array(
+		$this->cache = new $cacheClass('templatemanifest'.($includeTests ? '_tests' : ''));
-			'automatic_serialization' => true,
+		$this->cacheKey = 'manifest';
-			'lifetime' => null
+
-		));
+		$this->forceRegen = $forceRegen;
 	}
 	/**
--- a/dev/SapphireTest.php
+++ b/dev/SapphireTest.php
@ -515,6 +515,17 @@ class SapphireTest extends PHPUnit_Framework_TestCase {
 			$controller->response->removeHeader('Location');
 		}
 	}
 	public static function assertContains($needle, $haystack, $message = '', $ignoreCase = FALSE, $checkForObjectIdentity = TRUE) {
 		if ($haystack instanceof DBField) $haystack = (string)$haystack;
 		parent::assertContains($needle, $haystack, $message, $ignoreCase, $checkForObjectIdentity);
 	}
 	public static function assertNotContains($needle, $haystack, $message = '', $ignoreCase = FALSE, $checkForObjectIdentity = TRUE) {
 		if ($haystack instanceof DBField) $haystack = (string)$haystack;
 		parent::assertNotContains($needle, $haystack, $message, $ignoreCase, $checkForObjectIdentity);
 	}
 	/**
 	 * Clear the log of emails sent
 	 */
--- a/dev/TestRunner.php
+++ b/dev/TestRunner.php
@ -88,6 +88,10 @@ class TestRunner extends Controller {
 		SS_TemplateLoader::instance()->pushManifest(new SS_TemplateManifest(
 			BASE_PATH, project(), true, isset($_GET['flush'])
 		));
 		Config::inst()->pushConfigStaticManifest(new SS_ConfigStaticManifest(
 			BASE_PATH, true, isset($_GET['flush'])
 		));
 	}
 	public function init() {
--- a/docs/en/changelogs/3.1.0.md
+++ b/docs/en/changelogs/3.1.0.md
@ -15,6 +15,8 @@
 ### Framework
 * Shortcodes are no longer supported in template files. They continue to work in DB fields and other
   HTMLText-cast fields.
 * `DataList` and `ArrayList` are now immutable, they'll return cloned instances on modification
 * Behaviour testing support through [Behat](http://behat.org), with CMS test coverage 
   (see the [SilverStripe Behat Extension]() for details)
@ -22,6 +24,7 @@
 * Deny URL access if `Controller::$allowed_actions` is undefined
 * Removed support for "*" rules in `Controller::$allowed_actions`
 * Removed support for overriding rules on parent classes through `Controller::$allowed_actions`
 * `RestfulService` verifies SSL peers by default
 * Editing of relation table data (`$many_many_extraFields`) in `GridField`
 * Optional integration with ImageMagick as a new image manipulation backend
 * Support for PHP 5.4's built-in webserver
@ -29,6 +32,55 @@
 ## Upgrading
 ### Static configuration properties are now immutable, you must use Config API.
 A common SilverStripe pattern is to use a static variable on a class to define a configuration parameter.
 The configuration system added in SilverStripe 3.0 builds on this by using this static variable as a way
 of defining the default value.
 In SilverStripe 3.0, it was possible to edit this value at run-time and have the change propagate into the
 configuration system.  This is no longer the case, for performance reasons.
 Many of the configuration variables have been change to "private" so that attempts to change them throw an
 error, but if you do have a configuration static that is able to be changed, and you change it, then the
 configuration system will silently ignore it.
 Please change all run-time manipulation of configuration to use `Config::inst()->update()` or
 `$this->config()->update()`.  For more information about how to use the config system, see the 
 ["Configuration" topic](/topic/configuration).
 ### default_cast is now Text
 In order to reduce the chance of accidentally allowing XSS attacks, the value of default_cast
 has been changed in 3.1 from HTMLText to Text. This means that any values used in a template
 that haven't been explicitly cast as safe will be escaped (`<` replaced with `&lt;` etc).
 When upgrading, if methods return HTML fragments they need to explicitly cast them
 as such. This can either be done by returning an HTMLText object, like:
 	:::php
 	return DBField::create_field('HTMLText', '<div></div>');
 or by defining the casting of the accessor method, like:
 	:::php
 	class Page extends SiteTree {
 		static $casting = array(
 			'MyDiv' => 'HTMLText'
 		)
 		function MyDiv() {
 			return '<div></div>';
 		}
 	}
 SSViewer#process (and as a result ViewableData#renderWith) have been changed to already return
 explicitly cast HTMLText instances, so functions that return the result of these methods won't
 have to do any additional casting.
 Note that this change means that if code was testing the result via is_string, that is no longer
 reliable.
 ### Deny URL access if `Controller::$allowed_actions` is undefined or empty array
 In order to make controller access checks more consistent and easier to
@ -174,6 +226,17 @@ in order to reduce the boilerplate code required to get a model editable in the
 Note: GridField is already relying on the permission checks performed
 through the CMS controllers, providing a simple level of security.
 ### RestfulService verifies SSL peers by default
 This makes the implementation "secure by default", by removing
 the call to `curl_setopt(CURLOPT_SSL_VERIFYPEER, false)`.
 Failing to validate SSL peers makes HTTP requests vulnerable to man in the middle attacks.
 The underlying `curl` library relies on the operating system for the resulting CA certificate
 verification. On some systems (mainly Windows), these certificates are not available on
 a standard PHP installation, and need to be added manually through `CURLOPT_CAINFO`.
 Although it is not recommended, you can restore the old insecure behaviour with
 the following configuration: `RestfulService::set_default_curl_option(CURLOPT_SSL_VERIFYPEER, false)`.
 ### Other
 * `TableListField`, `ComplexTableField`, `TableField`, `HasOneComplexTableField`, `HasManyComplexTableField` and `ManyManyComplexTableField` have been removed from the core and placed into a module called "legacytablefields" located at https://github.com/silverstripe-labs/legacytablefields
@ -221,4 +284,4 @@ through the CMS controllers, providing a simple level of security.
 * `DataList#dataQuery` has been changed to return a clone of the query, and so can't be used to modify the list's query directly. Use `DataList#alterDataQuery` instead to modify dataQuery in a safe manner.
 * `ScheduledTask`, `QuarterHourlyTask`, `HourlyTask`, `DailyTask`, `MonthlyTask`, `WeeklyTask` and
 	 `YearlyTask` are deprecated, please extend from `BuildTask` or `CliController`,
-	 and invoke them in self-defined frequencies through Unix cronjobs etc.
+	 and invoke them in self-defined frequencies through Unix cronjobs etc.
--- a/docs/en/misc/contributing/documentation.md
+++ b/docs/en/misc/contributing/documentation.md
@ -12,9 +12,9 @@ and a GitHub user account.
 ## Editing online
-The easiest way of making a change the the documentation is to find the appropriate .md 
+The easiest way of making a change to the documentation is to find the appropriate .md 
-file in the [github.com/silverstripe/sapphire](https://github.com/silverstripe/sapphire/edit/3.0/docs/) repository
+file in the [github.com/silverstripe/sapphire](https://github.com/silverstripe/sapphire/tree/3.0/docs/) repository
-and press the "edit" button.  You will need a GitHub account to do this.
+and press the "edit" button.  You will need a GitHub account to do this.  You should make the changes in the lowest branch they apply to.
 * After you have made your change, describe it in the "commit summary" and "extended description" fields below, and press "Commit Changes".
 * After that you will see form to submit a Pull Request.  You should just be able to submit the form, and your changes will be sent to the core team for approval.
--- a/docs/en/reference/execution-pipeline.md
+++ b/docs/en/reference/execution-pipeline.md
@ -60,12 +60,12 @@ This will add an object with ID 12 to the cart.
 When you create a function, you can access the ID like this:
 	:::php
-	 public function addToCart ($request) {
+	public function addToCart ($request) {
-	  $param = $r->allParams();
+		$param = $request->allParams();
-	  echo "my ID = ".$param["ID"];
+		echo "my ID = " . $param["ID"];
-	  $obj = MyProduct::get()->byID($param["ID"]);
+		$obj = MyProduct::get()->byID($param["ID"]);
-	  $obj->addNow();
+		$obj->addNow();
-	 }
+	}
 ## Controllers and actions
@ -79,10 +79,13 @@ You can access the following controller-method with /team/signup
 	class Team extends DataObject {}
 	class Team_Controller extends Controller {
 		static $allowed_actions = array('signup');
-	  public function signup($id, $otherId) {
+		
-	    return $this->renderWith('MyTemplate');
+		public function signup($id, $otherId) {
-	  }
+			return $this->renderWith('MyTemplate');
 		}
 	}
 ## SSViewer template rendering
--- a/docs/en/topics/i18n.md
+++ b/docs/en/topics/i18n.md
@ -299,13 +299,14 @@ Example Translation Table (mymodule/javascript/lang/de_DE.js)
 	alert(ss.i18n._t('MYMODULE.MYENTITY'));
-### Advanced Usage with injection
+### Advanced Usage with sprintf()
 	:::js
-	// MYMODULE.MYENTITY contains "Really delete {answer} articles by {author} authors?"
+	// MYMODULE.MYENTITY contains "Really delete %s articles by %s authors?"
-	alert(ss.i18n._t('MYMODULE.MYENTITY'),
+	alert(ss.i18n.sprintf(
-		array('answer' => 42, 'author' => 'Douglas Adams')
+		ss.i18n._t('MYMODULE.MYENTITY'),
-	));
+		42,
 		'Douglas Adams'
 	// Displays: "Really delete 42 articles by Douglas Adams?"
--- a/docs/en/topics/module-development.md
+++ b/docs/en/topics/module-development.md
@ -3,7 +3,7 @@
 ## Introduction
 Creating a module is a good way to re-use abstract code and templates across multiple projects. SilverStripe already has
-certain modules included, for example "framework" and "cms". These three modules are the core functionality and
+certain modules included, for example "framework" and "cms". These two modules are the core functionality and
 templating for any initial installation. If you're wanting to add generic functionality that isn't specific to your
 project, like a forum, an ecommerce package or a blog you can do it like this;
--- a/docs/en/topics/rich-text-editing.md
+++ b/docs/en/topics/rich-text-editing.md
@ -189,7 +189,7 @@ Example: Remove field for "image captions"
 	:::php
 	// File: mysite/_config.php
-	HtmlEditorField::add_extension('MyToolbarExtension');
+	HtmlEditorField_Toolbar::add_extension('MyToolbarExtension');
 Adding functionality is a bit more advanced, you'll most likely
 need to add some fields to the PHP forms, as well as write some
--- a/docs/en/tutorials/2-extending-a-basic-site.md
+++ b/docs/en/tutorials/2-extending-a-basic-site.md
@ -201,9 +201,9 @@ the date field will have the date format defined by your locale.
 		$fields = parent::getCMSFields();
 		$fields->addFieldToTab('Root.Main', $dateField = new DateField('Date','Article Date (for example: 20/12/2010)'), 'Content');
-        $dateField->setConfig('showcalendar', true);
+		$dateField->setConfig('showcalendar', true);
-        $fields->addFieldToTab('Root.Main', $dateField, 'Content');
+		$fields->addFieldToTab('Root.Main', $dateField, 'Content');
-        $fields->addFieldToTab('Root.Main', new TextField('Author'), 'Content');
+		$fields->addFieldToTab('Root.Main', new TextField('Author'), 'Content');
 		return $fields;
 	}
--- a/email/Mailer.php
+++ b/email/Mailer.php
@ -7,7 +7,7 @@
 * @package framework
 * @subpackage email
 */
-class Mailer {
+class Mailer extends Object {
 	/**
 	 * Send a plain-text email.
--- a/filesystem/File.php
+++ b/filesystem/File.php
@ -411,7 +411,7 @@ class File extends DataObject {
 	 * @return String
 	 */
 	public function appCategory() {
-		return self::get_app_category($this->Extension);
+		return self::get_app_category($this->getExtension());
 	}
 	public function CMSThumbnail() {
@ -426,7 +426,7 @@ class File extends DataObject {
 	 * @return String 
 	 */
 	public function Icon() {
-		$ext = $this->Extension;
+		$ext = strtolower($this->getExtension());
 		if(!Director::fileExists(FRAMEWORK_DIR . "/images/app_icons/{$ext}_32.gif")) {
 			$ext = $this->appCategory();
 		}
--- a/filesystem/GD.php
+++ b/filesystem/GD.php
@ -264,6 +264,11 @@ class GDBackend extends Object implements Image_Backend {
 	public function crop($top, $left, $width, $height) {
 		$newGD = imagecreatetruecolor($width, $height);
 		// Preserve alpha channel between images
 		imagealphablending($newGD, false);
 		imagesavealpha($newGD, true);
 		imagecopyresampled($newGD, $this->gd, 0, 0, $left, $top, $width, $height, $width, $height);
 		$output = clone $this;
--- a/forms/FieldGroup.php
+++ b/forms/FieldGroup.php
@ -110,10 +110,10 @@ class FieldGroup extends CompositeField {
 		$fs = $this->FieldList();
 		foreach($fs as $subfield) {
-			if($m = $subfield->Message()) $message[] = $m;
+			if($m = $subfield->Message()) $message[] = rtrim($m, ".");
 		}
-		return (isset($message)) ? implode(",  ", $message) . ". " : "";
+		return (isset($message)) ? implode(",  ", $message) . "." : "";
 	}	
 	/**
--- a/forms/HtmlEditorField.php
+++ b/forms/HtmlEditorField.php
@ -56,8 +56,8 @@ class HtmlEditorField extends TextareaField {
 	 */
 	public function Field($properties = array()) {
 		// mark up broken links
-		$value  = new SS_HTMLValue($this->value);
+		$value = Injector::inst()->create('HTMLValue', $this->value);
-		
+
 		if($links = $value->getElementsByTagName('a')) foreach($links as $link) {
 			$matches = array();
@ -103,7 +103,7 @@ class HtmlEditorField extends TextareaField {
 		$linkedPages = array();
 		$linkedFiles = array();
-		$htmlValue = new SS_HTMLValue($this->value);
+		$htmlValue = Injector::inst()->create('HTMLValue', $this->value);
 		if(class_exists('SiteTree')) {
 			// Populate link tracking for internal links & links to asset files.
--- a/forms/gridfield/GridField.php
+++ b/forms/gridfield/GridField.php
@ -326,7 +326,7 @@ class GridField extends FormField {
 			}
 		}
-		$total = $list->count();
+		$total = count($list);
 		if($total > 0) {
 			$rows = array();
 			foreach($list as $idx => $record) {
--- a/javascript/HtmlEditorField.js
+++ b/javascript/HtmlEditorField.js
@ -900,7 +900,8 @@ ss.editorWrappers['default'] = ss.editorWrappers.tinyMCE;
 					//get the uploaded file ID when this event triggers, signaling the upload has compeleted successfully
 					editFieldIDs.push($(this).data('id'));
 				});
-				var uploadedFiles = form.find('.ss-uploadfield-files').children('.ss-uploadfield-item');
+				// we only want this .ss-uploadfield-files - else we get all ss-uploadfield-files wich include the ones not related to #tinymce insertmedia
 				var uploadedFiles = $('.ss-uploadfield-files', this).children('.ss-uploadfield-item');
 				uploadedFiles.each(function(){
 					var uploadedID = $(this).data('fileid');
 					if ($.inArray(uploadedID, editFieldIDs) == -1) {
--- a/model/DataObject.php
+++ b/model/DataObject.php
@ -1816,10 +1816,6 @@ class DataObject extends ViewableData implements DataObjectInterface, i18nEntity
 					foreach($otherManyMany as $inverseComponentName => $candidateClass) {
 						if($candidateClass == $class || is_subclass_of($class, $candidateClass)) {
 							$parentField = ($class == $candidate) ? "ChildID" : $candidateClass . "ID";
 							// HACK HACK HACK!
 							if($component == 'NestedProducts') {
 								$parentField = $candidateClass . "ID";
 							}
 							return array($class, $candidate, $parentField, $childField,
 								"{$candidate}_$inverseComponentName");
--- a/model/HTMLValue.php
+++ b/model/HTMLValue.php
@ -1,69 +1,156 @@
 <?php
 /**
- * This class acts as a wrapper around the built in DOMDocument class in order to use it to manage a HTML snippet,
+ * This class handles the converting of HTML fragments between a string and a DOMDocument based
- * rather than a whole document, while still exposing the DOMDocument API.
+ * representation.
 *
 * It's designed to allow dependancy injection to replace the standard HTML4 version with one that
 * handles XHTML or HTML5 instead
 *
 * @package framework
 * @subpackage integration
 */
-class SS_HTMLValue extends ViewableData {
+abstract class SS_HTMLValue extends ViewableData {
 	/**
 	 * @var DOMDocument
 	 */
 	protected $document;
 	/**
 	 * @param string $content
 	 */
 	public function __construct($content = null) {
 		$this->setDocument(new DOMDocument('1.0', 'UTF-8'));
 		$this->setScrictErrorChecking(false);
 		$this->setOutputFormatting(false);
 		$this->setContent($content);
 	public function __construct($fragment = null) {
 		if ($fragment) $this->setContent($fragment);
 		parent::__construct();
 	}
-	/**
+	abstract public function setContent($fragment);
 	 * Should strict error checking be used?
 	 * @param boolean $bool
 	 */
 	public function setScrictErrorChecking($bool) {
 		$this->getDocument()->scrictErrorChecking = $bool;
 	}
 	/**
 	 * Should the output be formatted?
 	 * @param boolean $bool
 	 */
 	public function setOutputFormatting($bool) {
 		$this->getDocument()->formatOutput = $bool;
 	}
 	/**
 	 * @param string $content
 	 * @return string
 	 */
 	public function getContent() {
-		// strip any surrounding tags before the <body> and after the </body> which are automatically added by
+		$doc = clone $this->getDocument();
-		// DOMDocument.  Note that we can't use the argument to saveHTML() as it's only supported in PHP 5.3.6+,
+		$xp = new DOMXPath($doc);
 		// we support 5.3.2 as a minimum in addition to the above, trim any surrounding newlines from the output
-		// shortcodes use square brackets which get escaped into HTML entities by saveHTML()
+		// If there's no body, the content is empty string
-		// this manually replaces them back to square brackets so that the shortcodes still work correctly
+		if (!$doc->getElementsByTagName('body')->length) return '';
-		// we can't use urldecode() here, as valid characters like "+" will be incorrectly replaced with spaces
+
-		return trim(
+		// saveHTML Percentage-encodes any URI-based attributes. We don't want this, since it interferes with
-			preg_replace(
+		// shortcodes. So first, save all the attribute values for later restoration.
-				array(
+		$attrs = array(); $i = 0;
-					'/(.*)<body>/is',
+
-					'/<\/body>(.*)/is',
+		foreach ($xp->query('//body//@*') as $attr) {
-				),
+			$key = "__HTMLVALUE_".($i++);
-				'',
+			$attrs[$key] = $attr->value;
-				str_replace(array('%5B', '%5D'), array('[', ']'), $this->getDocument()->saveHTML())
+			$attr->value = $key;
-			)
+		}
 		// Then, call saveHTML & extract out the content from the body tag
 		$res = preg_replace(
 			array(
 				'/^(.*?)<body>/is',
 				'/<\/body>(.*?)$/isD',
 			),
 			'',
 			$doc->saveHTML()
 		);
 		// Then replace the saved attributes with their original versions
 		$res = preg_replace_callback('/__HTMLVALUE_(\d+)/', function($matches) use ($attrs) {
 			return $attrs[$matches[0]];
 		}, $res);
 		return $res;
 	}
 	/** @see HTMLValue::getContent() */
 	public function forTemplate() {
 		return $this->getContent();
 	}
 	/** @var DOMDocument */
 	private $document = null;
 	/** @var bool */
 	private $valid = true;
 	/**
 	 * Get the DOMDocument for the passed content
 	 * @return DOMDocument | false - Return false if HTML not valid, the DOMDocument instance otherwise
 	 */
 	public function getDocument() {
 		if (!$this->valid) {
 			return false;
 		}
 		else if ($this->document) {
 			return $this->document;
 		}
 		else {
 			$this->document = new DOMDocument('1.0', 'UTF-8');
 			$this->document->strictErrorChecking = false;
 			$this->document->formatOutput = false;
 			return $this->document;
 		}
 	}
 	/**
 	 * Is this HTMLValue in an errored state?
 	 * @return bool
 	 */
 	public function isValid() {
 		return $this->valid;
 	}
 	/**
 	 * @param DOMDocument $document
 	 */
 	public function setDocument($document) {
 		$this->document = $document;
 		$this->valid = true;
 	}
 	public function setInvalid() {
 		$this->document = $this->valid = false;
 	}
 	/**
 	 * Pass through any missed method calls to DOMDocument (if they exist)
 	 * so that HTMLValue can be treated mostly like an instance of DOMDocument
 	 */
 	public function __call($method, $arguments) {
 		$doc = $this->getDocument();
 		if(method_exists($doc, $method)) {
 			return call_user_func_array(array($doc, $method), $arguments);
 		}
 		else {
 			return parent::__call($method, $arguments);
 		}
 	}
 	/**
 	 * Get the body element, or false if there isn't one (we haven't loaded any content
 	 * or this instance is in an invalid state)
 	 */
 	public function getBody() {
 		$doc = $this->getDocument();
 		if (!$doc) return false;
 		$body = $doc->getElementsByTagName('body');
 		if (!$body->length) return false;
 		return $body->item(0);
 	}
 	/**
 	 * Make an xpath query against this HTML
 	 *
 	 * @param $query string - The xpath query string
 	 * @return DOMNodeList
 	 */
 	public function query($query) {
 		$xp = new DOMXPath($this->getDocument());
 		return $xp->query($query);
 	}
 }
 class SS_HTML4Value extends SS_HTMLValue {
 	/**
 	 * @param string $content
 	 * @return bool
@ -73,41 +160,12 @@ class SS_HTMLValue extends ViewableData {
 		// This behaviour is apparently XML spec, but we don't want this because it messes up the HTML
 		$content = str_replace(chr(13), '', $content);
 		// Reset the document if we're in an invalid state for some reason
 		if (!$this->isValid()) $this->setDocument(null);
 		return @$this->getDocument()->loadHTML(
 			'<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head>' .
 			"<body>$content</body></html>"
 		);
 	}
 	/**
 	 * @return DOMDocument
 	 */
 	public function getDocument() {
 		return $this->document;
 	}
 	/**
 	 * @param DOMDocument $document
 	 */
 	public function setDocument($document) {
 		$this->document = $document;
 	}
 	/**
 	 * A simple convenience wrapper around DOMDocument::getElementsByTagName().
 	 *
 	 * @param string $name
 	 * @return DOMNodeList
 	 */
 	public function getElementsByTagName($name) {
 		return $this->getDocument()->getElementsByTagName($name);
 	}
 	/**
 	 * @see HTMLValue::getContent()
 	 */
 	public function forTemplate() {
 		return $this->getContent();
 	}
 }
--- a/model/ManyManyList.php
+++ b/model/ManyManyList.php
@ -168,11 +168,27 @@ class ManyManyList extends RelationList {
 	 * Remove all items from this many-many join.  To remove a subset of items, filter it first.
 	 */
 	public function removeAll() {
-		$query = $this->dataQuery()->query();
+		$base = ClassInfo::baseDataClass($this->dataClass());
-		$query->setDelete(true);
+
-		$query->setSelect(array('*'));
+		// Remove the join to the join table to avoid MySQL row locking issues.
-		$query->setFrom("\"$this->joinTable\"");
+		$query = $this->dataQuery();
-		$query->execute();
+		$query->removeFilterOn($query->getQueryParam('Foreign.Filter'));
 		$query = $query->query();
 		$query->setSelect("\"$base\".\"ID\"");
 		$from = $query->getFrom();
 		unset($from[$this->joinTable]);
 		$query->setFrom($from);
 		// Use a sub-query as SQLite does not support setting delete targets in
 		// joined queries.
 		$delete = new SQLQuery();
 		$delete->setDelete(true);
 		$delete->setFrom("\"$this->joinTable\"");
 		$delete->addWhere($this->foreignIDFilter());
 		$delete->addWhere("\"$this->joinTable\".\"$this->localKey\" IN ({$query->sql()})");
 		$delete->execute();
 	}
 	/**
--- a/model/UnsavedRelationList.php
+++ b/model/UnsavedRelationList.php
@ -199,6 +199,26 @@ class UnsavedRelationList extends ArrayList {
 		$this->addMany($idList);
 	}
 	/**
 	 * Returns an array with both the keys and values set to the IDs of the records in this list.
 	 *
 	 * Does not return the IDs for unsaved DataObjects
 	 */
 	public function getIDList() {
 		// Get a list of IDs of our current items - if it's not a number then object then assume it's a DO.
 		$ids = array_map(function($obj) {
 			return is_numeric($obj) ? $obj : $obj->ID;
 		}, $this->items);
 		// Strip out duplicates and anything resolving to False.
 		$ids = array_filter(array_unique($ids));
 		// Change the array from (1, 2, 3) to (1 => 1, 2 => 2, 3 => 3)
 		if ($ids) $ids = array_combine($ids, $ids);
 		return $ids;
 	}
 	/**
 	 * Returns the first item in the list
 	 *
@ -294,10 +314,6 @@ class UnsavedRelationList extends ArrayList {
 		throw new LogicException(__FUNCTION__ . " can't be called on an UnsavedRelationList.");
 	}
 	public function getIDList() {
 		throw new LogicException(__FUNCTION__ . " can't be called on an UnsavedRelationList.");
 	}
 	public function getRange($offset, $length) {
 		throw new LogicException(__FUNCTION__ . " can't be called on an UnsavedRelationList.");
 	}
--- a/model/fieldtypes/DBField.php
+++ b/model/fieldtypes/DBField.php
@ -295,5 +295,8 @@ abstract class DBField extends ViewableData {
 </ul>
 DBG;
 	}
-	
+
 	public function __toString() {
 		return $this->forTemplate();
 	}
 }
--- a/model/fieldtypes/HTMLText.php
+++ b/model/fieldtypes/HTMLText.php
@ -11,7 +11,6 @@
 * @subpackage model
 */
 class HTMLText extends Text {
 	public static $escape_type = 'xml';
 	static $casting = array(
@ -33,6 +32,16 @@ class HTMLText extends Text {
 		'NoHTML' => 'Text',
 	);
 	protected $processShortcodes = true;
 	public function setOptions(array $options = array()) {
 		parent::setOptions($options);
 		if(array_key_exists("shortcodes", $options)) {
 			$this->processShortcodes = !!$options["shortcodes"];
 		}
 	}
 	/**
 	 * Create a summary of the content. This will be some section of the first paragraph, limited by
 	 * $maxWords. All internal tags are stripped out - the return value is a string
@ -133,7 +142,12 @@ class HTMLText extends Text {
 	}	
 	public function forTemplate() {
-		return ShortcodeParser::get_active()->parse($this->value);
+		if ($this->processShortcodes) {
 			return ShortcodeParser::get_active()->parse($this->value);
 		}
 		else {
 			return $this->value;
 		}
 	}
 	/**
--- a/model/fieldtypes/HTMLVarchar.php
+++ b/model/fieldtypes/HTMLVarchar.php
@ -9,11 +9,26 @@
 class HTMLVarchar extends Varchar {
 	public static $escape_type = 'xml';
-	
+
-	public function forTemplate() {
+	protected $processShortcodes = true;
-		return ShortcodeParser::get_active()->parse($this->value);
+
 	public function setOptions(array $options = array()) {
 		parent::setOptions($options);
 		if(array_key_exists("shortcodes", $options)) {
 			$this->processShortcodes = !!$options["shortcodes"];
 		}
 	}
-	
+
 	public function forTemplate() {
 		if ($this->processShortcodes) {
 			return ShortcodeParser::get_active()->parse($this->value);
 		}
 		else {
 			return $this->value;
 		}
 	}
 	public function exists() {
 		return parent::exists() && $this->value != '<p></p>';
 	}
--- a/parsers/ShortcodeParser.php
+++ b/parsers/ShortcodeParser.php
@ -321,9 +321,8 @@ class ShortcodeParser {
 	 *
 	 * @param DOMDocument $doc
 	 */
-	protected function replaceAttributeTagsWithContent($doc) {
+	protected function replaceAttributeTagsWithContent($htmlvalue) {
-		$xp = new DOMXPath($doc);
+		$attributes = $htmlvalue->query('//@*[contains(.,"[")][contains(.,"]")]');
 		$attributes = $xp->query('//@*[contains(.,"[")][contains(.,"]")]');
 		$parser = $this;
 		for($i = 0; $i < $attributes->length; $i++) {
@ -462,7 +461,7 @@ class ShortcodeParser {
 	}
 	/**
-	 * Given a node with represents a shortcode marker and some informationabout the shortcode, call the 
+	 * Given a node with represents a shortcode marker and some information about the shortcode, call the
 	 * shortcode handler & replace the marker with the actual content
 	 * 
 	 * @param DOMElement $node
@ -488,57 +487,14 @@ class ShortcodeParser {
 		}
 		if ($content) {
-			$parsed = HTML5_Parser::parseFragment($content, 'div');
+			$parsed = Injector::inst()->create('HTMLValue', $content);
-			$this->insertListAfter($parsed, $node);
+			$body = $parsed->getBody();
 			if ($body) $this->insertListAfter($body->childNodes, $node);
 		}
 		$this->removeNode($node);
 	}
 	protected function loadHTML($html) {
 		require_once(THIRDPARTY_PATH.'/html5lib/HTML5/Parser.php');
 		// Convert any errors to exceptions
 		set_error_handler(
 			function($no, $str){
 				throw new Exception("HTML Parse Error: ".$str);
 			},
 			error_reporting()
 		);
 		// Use HTML5lib to parse the HTML fragment
 		try {
 			$bases = HTML5_Parser::parseFragment(trim($html), 'div');
 		}
 		catch (Exception $e) {
 			$bases = null;
 		}
 		// Disable our error handler (restoring to previous value)
 		restore_error_handler();
 		return $bases;
 	}
 	protected function saveHTML($doc) {
 		if (version_compare(PHP_VERSION, '5.3.6', '>=')){
 			$res = '';
 			foreach($doc->firstChild->childNodes as $child) $res .= $doc->saveHTML($child);
 		}
 		else {
 			$res = preg_replace(
 				array(
 					'/^(.*?)<html>/is',
 					'/<\/html>(.*?)$/is',
 				),
 				'',
 				$doc->saveHTML()
 			);
 		}
 		return $res;
 	}
 	/**
 	 * Parse a string, and replace any registered shortcodes within it with the result of the mapped callback.
 	 *
@ -556,11 +512,10 @@ class ShortcodeParser {
 		// use a proper DOM
 		list($content, $tags) = $this->replaceElementTagsWithMarkers($content);
 		$htmlvalue = Injector::inst()->create('HTMLValue', $content);
 		// Now parse the result into a DOM
-		$bases = $this->loadHTML($content);
+		if (!$htmlvalue->isValid()){
 		// If we couldn't parse the HTML, error out
 		if (!$bases || !$bases->length) {
 			if(self::$error_behavior == self::ERROR) {
 				user_error('Couldn\'t decode HTML when processing short codes', E_USER_ERRROR);
 			}
@ -569,15 +524,11 @@ class ShortcodeParser {
 			}
 		}
 		$doc = $bases->item(0)->ownerDocument;
 		$xp = new DOMXPath($doc);
 		// First, replace any shortcodes that are in attributes
-		$this->replaceAttributeTagsWithContent($doc);
+		$this->replaceAttributeTagsWithContent($htmlvalue);
 		// Find all the element scoped shortcode markers
-		$shortcodes = $xp->query('//img[@class="'.self::$marker_class.'"]');
+		$shortcodes = $htmlvalue->query('//img[@class="'.self::$marker_class.'"]');
 		// Find the parents. Do this before DOM modification, since SPLIT might cause parents to move otherwise
 		$parents = $this->findParentsForMarkers($shortcodes);
@ -605,8 +556,8 @@ class ShortcodeParser {
 			$this->replaceMarkerWithContent($shortcode, $tag);
 		}
-		
+
-		return $this->saveHTML($doc);
+		return $htmlvalue->getContent();
 	}
--- a/tests/core/ConfigTest.php
+++ b/tests/core/ConfigTest.php
@ -163,5 +163,55 @@ class ConfigTest extends SapphireTest {
 	public function testFragmentOrder() {
 		$this->markTestIncomplete();
 	}
-	
+
 	public function testLRUDiscarding() {
 		$cache = new ConfigTest_Config_LRU();
 		for ($i = 0; $i < Config_LRU::SIZE*2; $i++) $cache->set($i, $i);
 		$this->assertEquals(
 			Config_LRU::SIZE, count($cache->indexing),
 			'Homogenous usage gives exact discarding'
 		);
 		$cache = new ConfigTest_Config_LRU();
 		for ($i = 0; $i < Config_LRU::SIZE; $i++) $cache->set($i, $i);
 		for ($i = 0; $i < Config_LRU::SIZE; $i++) $cache->set(-1, -1);
 		$this->assertLessThan(
 			Config_LRU::SIZE, count($cache->indexing),
 			'Heterogenous usage gives sufficient discarding'
 		);
 	}
 	public function testLRUCleaning() {
 		$cache = new ConfigTest_Config_LRU();
 		for ($i = 0; $i < Config_LRU::SIZE; $i++) $cache->set($i, $i);
 		$this->assertEquals(Config_LRU::SIZE, count($cache->indexing));
 		$cache->clean();
 		$this->assertEquals(0, count($cache->indexing), 'Clean clears all items');
 		$this->assertFalse($cache->get(1), 'Clean clears all items');
 		$cache->set(1, 1, array('Foo'));
 		$this->assertEquals(1, count($cache->indexing));
 		$cache->clean('Foo');
 		$this->assertEquals(0, count($cache->indexing), 'Clean items with matching tag');
 		$this->assertFalse($cache->get(1), 'Clean items with matching tag');
 		$cache->set(1, 1, array('Foo', 'Bar'));
 		$this->assertEquals(1, count($cache->indexing));
 		$cache->clean('Bar');
 		$this->assertEquals(0, count($cache->indexing), 'Clean items with any single matching tag');
 		$this->assertFalse($cache->get(1), 'Clean items with any single matching tag');
 	}
 }
 class ConfigTest_Config_LRU extends Config_LRU {
 	public $cache;
 	public $indexing;
 }
--- a/tests/core/manifest/ConfigStaticManifestTest.php
+++ b/tests/core/manifest/ConfigStaticManifestTest.php
@ -0,0 +1,164 @@
 <?php
 class ConfigStaticManifestTest extends SapphireTest {
 	/* Example statics */
 	// Different access levels
 	static $nolevel;
 	public static $public;
 	protected static $protected;
 	private static $private;
 	static public $public2;
 	static protected $protected2;
 	static private $private2;
 	// Assigning values
 	static $snone;
 	static $snull = null;
 	static $sint = 1;
 	static $sfloat = 2.5;
 	static $sstring = 'string';
 	static $sarray = array(1, 2, array(3, 4), 5);
 	static $sheredoc = <<<DOC
 heredoc
 DOC;
 	static $snowdoc = <<<'DOC'
 nowdoc
 DOC;
 	// Assigning multiple values
 	static $onone, $onull = null, $oint = 1, $ofloat = 2.5, $ostring = 'string', $oarray = array(1, 2, array(3, 4), 5), $oheredoc = <<<DOC
 heredoc
 DOC
 , $onowdoc = <<<'DOC'
 nowdoc
 DOC;
 	static
 		$mnone,
 		$mnull = null,
 		$mint = 1,
 		$mfloat = 2.5,
 		$mstring = 'string',
 		$marray = array(
 			1, 2,
 			array(3, 4),
 			5
 		),
 		$mheredoc = <<<DOC
 heredoc
 DOC
 		,
 		$mnowdoc = <<<'DOC'
 nowdoc
 DOC;
 	static /* Has comment inline */ $commented_int = 1, /* And here */ $commented_string = 'string';
 	static
 		/**
 		 * Has docblock inline
 		 */
 		$docblocked_int = 1,
 		/** And here */
 		$docblocked_string = 'string';
 	// Should ignore static methpds
 	static function static_method() {}
 	// Should ignore method statics
 	function instanceMethod() {
 		static $method_static;
 	}
 	/* The tests */
 	protected function parseSelf() {
 		static $statics = null;
 		if ($statics === null) {
 			$parser = new SS_ConfigStaticManifest_Parser(__FILE__);
 			$parser->parse();
 		}
 		return $parser;
 	}
 	public function testParsingAccessLevels() {
 		$statics = $this->parseSelf()->getStatics();
 		$levels = array(
 			'nolevel' => null,
 			'public' => T_PUBLIC,
 			'public2' => T_PUBLIC,
 			'protected' => T_PROTECTED,
 			'protected2' => T_PROTECTED,
 			'private' => T_PRIVATE,
 			'private2' => T_PRIVATE
 		);
 		foreach($levels as $var => $level) {
 			$this->assertEquals(
 				$level,
 				$statics[__CLASS__][$var]['access'],
 				'Variable '.$var.' has '.($level ? token_name($level) : 'no').' access level'
 			);
 		}
 	}
 	public function testParsingValues() {
 		$statics = $this->parseSelf()->getStatics();
 		// Check assigning values
 		$values = array(
 			'none',
 			'null',
 			'int',
 			'float',
 			'string',
 			'array',
 			'heredoc',
 			'nowdoc'
 		);
 		$prepends = array(
 			's', // Each on it's own
 			'o', // All on one line
 			'm'  // All in on static statement, but each on own line
 		);
 		foreach ($values as $value) {
 			foreach ($prepends as $prepend) {
 				$var = "$prepend$value";
 				$this->assertEquals(
 					self::$$var,
 					$statics[__CLASS__][$var]['value'],
 					'Variable '.$var.' value is extracted properly'
 				);
 			}
 		}
 	}
 	public function testIgnoreComments() {
 		$statics = $this->parseSelf()->getStatics();
 		$this->assertEquals(self::$commented_int, $statics[__CLASS__]['commented_int']['value']);
 		$this->assertEquals(self::$commented_string, $statics[__CLASS__]['commented_string']['value']);
 		$this->assertEquals(self::$docblocked_int, $statics[__CLASS__]['docblocked_int']['value']);
 		$this->assertEquals(self::$docblocked_string, $statics[__CLASS__]['docblocked_string']['value']);
 	}
 	public function testIgnoresMethodStatics() {
 		$statics = $this->parseSelf()->getStatics();
 		$this->assertNull(@$statics[__CLASS__]['method_static']);
 	}
 	public function testIgnoresStaticMethods() {
 		$statics = $this->parseSelf()->getStatics();
 		$this->assertNull(@$statics[__CLASS__]['static_method']);
 	}
 }
--- a/tests/forms/RequirementsTest.php
+++ b/tests/forms/RequirementsTest.php
@ -254,13 +254,13 @@ class RequirementsTest extends SapphireTest {
 		$backend->delete_combined_files('RequirementsTest_bc.js');
 		$html = $backend->includeInHTML(false, self::$html_template);
-
+		
 		/* Javascript has correct path */
-		$this->assertTrue((bool)preg_match('/src=".*\/RequirementsTest_a\.js\?m=\d\d+&test=1&test=2&test=3/', $html),
+		$this->assertTrue((bool)preg_match('/src=".*\/RequirementsTest_a\.js\?m=\d\d+&amp;test=1&amp;test=2&amp;test=3/', $html),
 			'javascript has correct path'); 
 		/* CSS has correct path */
-		$this->assertTrue((bool)preg_match('/href=".*\/RequirementsTest_a\.css\?m=\d\d+&test=1&test=2&test=3/',$html),
+		$this->assertTrue((bool)preg_match('/href=".*\/RequirementsTest_a\.css\?m=\d\d+&amp;test=1&amp;test=2&amp;test=3/',$html),
 			'css has correct path'); 
 	}
@ -363,17 +363,17 @@ class RequirementsTest extends SapphireTest {
 		$backend->set_suffix_requirements(true);
 		$html = $backend->includeInHTML(false, $template);
 		$this->assertRegexp('/RequirementsTest_a\.js\?m=[\d]*/', $html);
-		$this->assertRegexp('/RequirementsTest_b\.js\?m=[\d]*&foo=bar&bla=blubb/', $html);
+		$this->assertRegexp('/RequirementsTest_b\.js\?m=[\d]*&amp;foo=bar&amp;bla=blubb/', $html);
 		$this->assertRegexp('/RequirementsTest_a\.css\?m=[\d]*/', $html);
-		$this->assertRegexp('/RequirementsTest_b\.css\?m=[\d]*&foo=bar&bla=blubb/', $html);
+		$this->assertRegexp('/RequirementsTest_b\.css\?m=[\d]*&amp;foo=bar&amp;bla=blubb/', $html);
 		$backend->set_suffix_requirements(false);
 		$html = $backend->includeInHTML(false, $template);
 		$this->assertNotContains('RequirementsTest_a.js=', $html);
 		$this->assertNotRegexp('/RequirementsTest_a\.js\?m=[\d]*/', $html);
-		$this->assertNotRegexp('/RequirementsTest_b\.js\?m=[\d]*&foo=bar&bla=blubb/', $html);
+		$this->assertNotRegexp('/RequirementsTest_b\.js\?m=[\d]*&amp;foo=bar&amp;bla=blubb/', $html);
 		$this->assertNotRegexp('/RequirementsTest_a\.css\?m=[\d]*/', $html);
-		$this->assertNotRegexp('/RequirementsTest_b\.css\?m=[\d]*&foo=bar&bla=blubb/', $html);
+		$this->assertNotRegexp('/RequirementsTest_b\.css\?m=[\d]*&amp;foo=bar&amp;bla=blubb/', $html);
 	}
 	public function assertFileIncluded($backend, $type, $files) {
--- a/tests/integration/HTML4ValueTest.php
+++ b/tests/integration/HTML4ValueTest.php
@ -3,10 +3,10 @@
 * @package framework
 * @subpackage tests
 */
-class SS_HTMLValueTest extends SapphireTest {
+class SS_HTML4ValueTest extends SapphireTest {
 	public function testInvalidHTMLSaving() {
-		$value = new SS_HTMLValue();
+		$value = new SS_HTML4Value();
 		$invalid = array (
 			'<p>Enclosed Value</p></p>'                              => '<p>Enclosed Value</p>',
 			'<meta content="text/html"></meta>'                      => '<meta content="text/html">',
@ -22,20 +22,15 @@ class SS_HTMLValueTest extends SapphireTest {
 	}
 	public function testUtf8Saving() {
-		$value = new SS_HTMLValue();
+		$value = new SS_HTML4Value();
 		$value->setContent('<p>ö ß ā い 家</p>');
 		$this->assertEquals('<p>ö ß ā い 家</p>', $value->getContent());
 	}
 	public function testOutputFormatting() {
 		$value = new SS_HTMLValue();
 		$value->setOutputFormatting(true);
 		$value->setContent('<meta content="text/html">');
 		$this->assertEquals('<meta content="text/html">', $value->getContent(), 'Formatted output works');
 	}
 	public function testInvalidHTMLTagNames() {
-		$value = new SS_HTMLValue();
+		$value = new SS_HTML4Value();
 		$invalid = array(
 			'<p><div><a href="test-link"></p></div>',
 			'<html><div><a href="test-link"></a></a></html_>',
@ -53,7 +48,8 @@ class SS_HTMLValueTest extends SapphireTest {
 	}
 	public function testMixedNewlines() {
-		$value = new SS_HTMLValue();
+		$value = new SS_HTML4Value();
 		$value->setContent("<p>paragraph</p>\n<ul><li>1</li>\r\n</ul>");
 		$this->assertEquals(
 			"<p>paragraph</p>\n<ul><li>1</li>\n</ul>",
--- a/tests/model/DataObjectSchemaGenerationTest.php
+++ b/tests/model/DataObjectSchemaGenerationTest.php
@ -43,9 +43,11 @@ class DataObjectSchemaGenerationTest extends SapphireTest {
 		// Table will have been initially created by the $extraDataObjects setting
 		// Let's insert a new field here
-		$oldDB = DataObjectSchemaGenerationTest_DO::$db;
+		Config::nest();
-		DataObjectSchemaGenerationTest_DO::$db['SecretField'] = 'Varchar(100)';
+		Config::inst()->update('DataObjectSchemaGenerationTest_DO', 'db', array(
-		
+			'SecretField' => 'Varchar(100)'
 		));
 		// Verify that the above extra field triggered a schema update
 		$db->beginSchemaUpdate();
 		$obj = new DataObjectSchemaGenerationTest_DO();
@ -55,7 +57,7 @@ class DataObjectSchemaGenerationTest extends SapphireTest {
 		$this->assertTrue($needsUpdating);
 		// Restore db configuration
-		DataObjectSchemaGenerationTest_DO::$db = $oldDB;
+		Config::unnest();
 	}
 	/**
@ -76,9 +78,12 @@ class DataObjectSchemaGenerationTest extends SapphireTest {
 		$this->assertFalse($needsUpdating);
 		// Test with alternate index format, although these indexes are the same
-		$oldIndexes = DataObjectSchemaGenerationTest_IndexDO::$indexes;
+		Config::nest();
-		DataObjectSchemaGenerationTest_IndexDO::$indexes = DataObjectSchemaGenerationTest_IndexDO::$indexes_alt;
+		Config::inst()->remove('DataObjectSchemaGenerationTest_IndexDO', 'indexes');
-				
+		Config::inst()->update('DataObjectSchemaGenerationTest_IndexDO', 'indexes',
 			Config::inst()->get('DataObjectSchemaGenerationTest_IndexDO', 'indexes_alt')
 		);
 		// Verify that it still doesn't need to be recreated
 		$db->beginSchemaUpdate();
 		$obj2 = new DataObjectSchemaGenerationTest_IndexDO();
@ -88,7 +93,7 @@ class DataObjectSchemaGenerationTest extends SapphireTest {
 		$this->assertFalse($needsUpdating);
 		// Restore old index format
-		DataObjectSchemaGenerationTest_IndexDO::$indexes = $oldIndexes;
+		Config::unnest();
 	}
 	/**
@ -101,9 +106,13 @@ class DataObjectSchemaGenerationTest extends SapphireTest {
 		// Table will have been initially created by the $extraDataObjects setting
 		// Update the SearchFields index here
-		$oldIndexes = DataObjectSchemaGenerationTest_IndexDO::$indexes;
+		Config::nest();
-		DataObjectSchemaGenerationTest_IndexDO::$indexes['SearchFields']['value'] = '"Title"';
+		Config::inst()->update('DataObjectSchemaGenerationTest_IndexDO', 'indexes', array(
-		
+			'SearchFields' => array(
 				'value' => 'Title'
 			)
 		));
 		// Verify that the above index change triggered a schema update
 		$db->beginSchemaUpdate();
 		$obj = new DataObjectSchemaGenerationTest_IndexDO();
@ -113,7 +122,7 @@ class DataObjectSchemaGenerationTest extends SapphireTest {
 		$this->assertTrue($needsUpdating);
 		// Restore old indexes
-		DataObjectSchemaGenerationTest_IndexDO::$indexes = $oldIndexes;
+		Config::unnest();
 	}
 }
--- a/tests/model/ManyManyListTest.php
+++ b/tests/model/ManyManyListTest.php
@ -161,4 +161,53 @@ class ManyManyListTest extends SapphireTest {
 		$this->assertEquals($teamTwoID, $teamsWithoutTheCaptain->first()->ID,
 			'The ManyManyList contains the wrong team');
 	}
 	public function testRemoveAll() {
 		$first = new DataObjectTest_Team();
 		$first->write();
 		$second = new DataObjectTest_Team();
 		$second->write();
 		$firstPlayers = $first->Players();
 		$secondPlayers = $second->Players();
 		$a = new DataObjectTest_Player();
 		$a->ShirtNumber = 'a';
 		$a->write();
 		$b = new DataObjectTest_Player();
 		$b->ShirtNumber = 'b';
 		$b->write();
 		$firstPlayers->add($a);
 		$firstPlayers->add($b);
 		$secondPlayers->add($a);
 		$secondPlayers->add($b);
 		$this->assertEquals(array('a', 'b'), $firstPlayers->sort('ShirtNumber')->column('ShirtNumber'));
 		$this->assertEquals(array('a', 'b'), $secondPlayers->sort('ShirtNumber')->column('ShirtNumber'));
 		$firstPlayers->removeAll();
 		$this->assertEquals(0, count($firstPlayers));
 		$this->assertEquals(2, count($secondPlayers));
 		$firstPlayers->removeAll();
 		$firstPlayers->add($a);
 		$firstPlayers->add($b);
 		$this->assertEquals(array('a', 'b'), $firstPlayers->sort('ShirtNumber')->column('ShirtNumber'));
 		$firstPlayers->filter('ShirtNumber', 'b')->removeAll();
 		$this->assertEquals(array('a'), $firstPlayers->column('ShirtNumber'));
 		$this->assertEquals(array('a', 'b'), $secondPlayers->sort('ShirtNumber')->column('ShirtNumber'));
 		$this->assertNotNull(DataObjectTest_Player::get()->byID($a->ID));
 		$this->assertNotNull(DataObjectTest_Player::get()->byID($b->ID));
 	}
 }
--- a/tests/model/UnsavedRelationListTest.php
+++ b/tests/model/UnsavedRelationListTest.php
@ -157,6 +157,32 @@ class UnsavedRelationListTest extends SapphireTest {
 			array('Name' => 'C', 'Number' => 3)
 		), $object->Siblings());
 	}
 	public function testGetIDList() {
 		$object = new UnsavedRelationListTest_DataObject;
 		$children = $object->Children();
 		$this->assertEquals($children->getIDList(), array());
 		$children->add($child1 = new UnsavedRelationListTest_DataObject(array('Name' => 'A')));
 		$children->add($child2 = new UnsavedRelationListTest_DataObject(array('Name' => 'B')));
 		$children->add($child3 = new UnsavedRelationListTest_DataObject(array('Name' => 'C')));
 		$children->add($child1);
 		$this->assertEquals($children->getIDList(), array());
 		$child1->write();
 		$this->assertEquals($children->getIDList(), array(
 			$child1->ID => $child1->ID
 		));
 		$child2->write();
 		$child3->write();
 		$this->assertEquals($children->getIDList(), array(
 			$child1->ID => $child1->ID,
 			$child2->ID => $child2->ID,
 			$child3->ID => $child3->ID
 		));
 	}
 }
 class UnsavedRelationListTest_DataObject extends DataObject implements TestOnly {
--- a/tests/view/ContentNegotiatorTest.php
+++ b/tests/view/ContentNegotiatorTest.php
@ -0,0 +1,67 @@
 <?php
 class ContentNegotiatorTest extends SapphireTest {
 	/**
 	 * Small helper to render templates from strings
 	 * Cloned from SSViewerTest
 	 */
 	private function render($templateString, $data = null) {
 		$t = SSViewer::fromString($templateString);
 		if(!$data) $data = new SSViewerTestFixture();
 		return $t->process($data);
 	}
 	public function testXhtmltagReplacement() {
 		$tmpl1 = '<?xml version="1.0" encoding="UTF-8"?>
 			<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'
 				. ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 			<html>
 				<head><% base_tag %></head>
 				<body>
 				<form action="#">
 					<select>
 							<option>aa</option>
 							<option selected = "selected">bb</option>
 							<option selected="selected">cc</option>
 							<option class="foo" selected>dd</option>
 							<option>ee</option>
 							<option selected value="">ll</option>
 					</select>
 					<input type="checkbox">ff
 					<input type="checkbox" checked = "checked">gg
 					<input type="checkbox" checked="checked">hh
 					<input class="bar" type="checkbox" checked>ii
 					<input type="checkbox" checked class="foo">jj
 					<input type="submit">
 				</form>
 				<body>
 			</html>';
 		// Check that the content negotiator converts to the equally legal formats
 		$negotiator = new ContentNegotiator();	
 		$response = new SS_HTTPResponse($this->render($tmpl1));
 		$negotiator->xhtml($response);
 		////////////////////////
 		// XHTML select options
 		////////////////////////
 		$this->assertRegExp('/<option>aa<\/option>/', $response->getBody());
 		$this->assertRegExp('/<option selected = "selected">bb<\/option>/', $response->getBody());
 		$this->assertRegExp('/<option selected="selected">cc<\/option>/', $response->getBody());
 		// Just transform this
 		$this->assertRegExp('/<option class="foo" selected="selected">dd<\/option>/', $response->getBody());
 		$this->assertRegExp('/<option selected="selected" value="">ll<\/option>/', $response->getBody());
 		////////////////////////////////////////////////
 		// XHTML checkbox options + XHTML input closure
 		////////////////////////////////////////////////
 		$this->assertRegExp('/<input type="checkbox"\/>ff/', $response->getBody());
 		$this->assertRegExp('/<input type="checkbox" checked = "checked"\/>g/', $response->getBody());
 		$this->assertRegExp('/<input type="checkbox" checked="checked"\/>hh/', $response->getBody());
 		// Just transform this
 		$this->assertRegExp('/<input class="bar" type="checkbox" checked="checked"\/>ii/', $response->getBody());
 		$this->assertRegExp('/<input type="checkbox" checked="checked" class="foo"\/>jj/', $response->getBody());
 	}
 }
--- a/tests/view/SSViewerTest.php
+++ b/tests/view/SSViewerTest.php
@ -59,11 +59,23 @@ class SSViewerTest extends SapphireTest {
 	public function testComments() {
 		$output = $this->render(<<<SS
 This is my template<%-- this is a comment --%>This is some content<%-- this is another comment --%>Final content
 <%-- Alone multi
 	line comment --%>
 Some more content
 Mixing content and <%-- multi
 	line comment --%> Final final 
 content
 SS
 );
 		$shouldbe = <<<SS
 This is my templateThis is some contentFinal content
 Some more content
 Mixing content and  Final final 
 content
 SS;
-		$this->assertEquals("This is my templateThis is some contentFinal content", 
+		$this->assertEquals($shouldbe, $output);
 			preg_replace("/\n?<!--.*-->\n?/U",'',$output));
 	}
 	public function testBasicText() {
@ -886,6 +898,41 @@ after')
 		);
 	}
 	protected function useTestTheme($theme, $callback) {
 		global $project;
 		$themeBaseDir = dirname(__FILE__);
 		$manifest = new SS_TemplateManifest($themeBaseDir, $project, true, true);
 		SS_TemplateLoader::instance()->pushManifest($manifest);
 		$origTheme = SSViewer::current_theme();
 		SSViewer::set_theme($theme);
 		$e = null;
 		try { $callback(); }
 		catch (Exception $e) { /* NOP for now, just save $e */ }
 		// Remove all the test themes we created
 		SS_TemplateLoader::instance()->popManifest();
 		SSViewer::set_theme($origTheme);
 		if ($e) throw $e;
 	}
 	public function testLayout() {
 		$self = $this;
 		$this->useTestTheme('layouttest', function() use ($self) {
 			$template = new SSViewer(array('Page'));
 			$self->assertEquals('Foo', $template->process(new ArrayData(array())));
 			$template = new SSViewer(array('Shortcodes', 'Page'));
 			$self->assertEquals('[file_link]', $template->process(new ArrayData(array())));
 		});
 	}
 	/**
 	 * @covers SSViewer::get_themes()
 	 */
@ -917,7 +964,7 @@ after')
 		// Remove all the test themes we created
 		Filesystem::removeFolder($testThemeBaseDir);
 	}
-	
+
 	public function testRewriteHashlinks() {
 		$oldRewriteHashLinks = SSViewer::getOption('rewriteHashlinks');
 		SSViewer::setOption('rewriteHashlinks', true);
@ -1171,14 +1218,14 @@ class SSViewerTest_GlobalProvider implements TemplateGlobalProvider, TestOnly {
 	public static function get_template_global_variables() {
 		return array(
-			'SSViewerTest_GlobalHTMLFragment' => array('method' => 'get_html'),
+			'SSViewerTest_GlobalHTMLFragment' => array('method' => 'get_html', 'casting' => 'HTMLText'),
-			'SSViewerTest_GlobalHTMLEscaped' => array('method' => 'get_html', 'casting' => 'Varchar'),
+			'SSViewerTest_GlobalHTMLEscaped' => array('method' => 'get_html'),
 			'SSViewerTest_GlobalAutomatic',
 			'SSViewerTest_GlobalReferencedByString' => 'get_reference',
 			'SSViewerTest_GlobalReferencedInArray' => array('method' => 'get_reference'),
-			'SSViewerTest_GlobalThatTakesArguments' => array('method' => 'get_argmix')
+			'SSViewerTest_GlobalThatTakesArguments' => array('method' => 'get_argmix', 'casting' => 'HTMLText')
 		);
 	}
--- a/tests/view/themes/layouttest/Layout/Page.ss
+++ b/tests/view/themes/layouttest/Layout/Page.ss
@ -0,0 +1 @@
 Foo
--- a/tests/view/themes/layouttest/Layout/Shortcodes.ss
+++ b/tests/view/themes/layouttest/Layout/Shortcodes.ss
@ -0,0 +1 @@
 [file_link]
--- a/tests/view/themes/layouttest/Page.ss
+++ b/tests/view/themes/layouttest/Page.ss
@ -0,0 +1 @@
 $Layout
--- a/thirdparty/html5lib/HTML5/Data.php
+++ b/thirdparty/html5lib/HTML5/Data.php
@ -1,114 +0,0 @@
 <?php
 // warning: this file is encoded in UTF-8!
 class HTML5_Data
 {
    // at some point this should be moved to a .ser file. Another
    // possible optimization is to give UTF-8 bytes, not Unicode
    // codepoints
    // XXX: Not quite sure why it's named this; this is
    // actually the numeric entity dereference table.
    protected static $realCodepointTable = array(
        0x00 => 0xFFFD, // REPLACEMENT CHARACTER
        0x0D => 0x000A, // LINE FEED (LF)
        0x80 => 0x20AC, // EURO SIGN ('€')
        0x81 => 0x0081, // <control>
        0x82 => 0x201A, // SINGLE LOW-9 QUOTATION MARK ('‚')
        0x83 => 0x0192, // LATIN SMALL LETTER F WITH HOOK ('ƒ')
        0x84 => 0x201E, // DOUBLE LOW-9 QUOTATION MARK ('„')
        0x85 => 0x2026, // HORIZONTAL ELLIPSIS ('…')
        0x86 => 0x2020, // DAGGER ('†')
        0x87 => 0x2021, // DOUBLE DAGGER ('‡')
        0x88 => 0x02C6, // MODIFIER LETTER CIRCUMFLEX ACCENT ('ˆ')
        0x89 => 0x2030, // PER MILLE SIGN ('‰')
        0x8A => 0x0160, // LATIN CAPITAL LETTER S WITH CARON ('Š')
        0x8B => 0x2039, // SINGLE LEFT-POINTING ANGLE QUOTATION MARK ('‹')
        0x8C => 0x0152, // LATIN CAPITAL LIGATURE OE ('Œ')
        0x8D => 0x008D, // <control>
        0x8E => 0x017D, // LATIN CAPITAL LETTER Z WITH CARON ('Ž')
        0x8F => 0x008F, // <control>
        0x90 => 0x0090, // <control>
        0x91 => 0x2018, // LEFT SINGLE QUOTATION MARK ('‘')
        0x92 => 0x2019, // RIGHT SINGLE QUOTATION MARK ('’')
        0x93 => 0x201C, // LEFT DOUBLE QUOTATION MARK ('“')
        0x94 => 0x201D, // RIGHT DOUBLE QUOTATION MARK ('”')
        0x95 => 0x2022, // BULLET ('•')
        0x96 => 0x2013, // EN DASH ('–')
        0x97 => 0x2014, // EM DASH ('—')
        0x98 => 0x02DC, // SMALL TILDE ('˜')
        0x99 => 0x2122, // TRADE MARK SIGN ('™')
        0x9A => 0x0161, // LATIN SMALL LETTER S WITH CARON ('š')
        0x9B => 0x203A, // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK ('›')
        0x9C => 0x0153, // LATIN SMALL LIGATURE OE ('œ')
        0x9D => 0x009D, // <control>
        0x9E => 0x017E, // LATIN SMALL LETTER Z WITH CARON ('ž')
        0x9F => 0x0178, // LATIN CAPITAL LETTER Y WITH DIAERESIS ('Ÿ')
    );
    protected static $namedCharacterReferences;
    protected static $namedCharacterReferenceMaxLength;
    /**
     * Returns the "real" Unicode codepoint of a malformed character
     * reference.
     */
    public static function getRealCodepoint($ref) {
        if (!isset(self::$realCodepointTable[$ref])) return false;
        else return self::$realCodepointTable[$ref];
    }
    public static function getNamedCharacterReferences() {
        if (!self::$namedCharacterReferences) {
            self::$namedCharacterReferences = unserialize(
                file_get_contents(dirname(__FILE__) . '/named-character-references.ser'));
        }
        return self::$namedCharacterReferences;
    }
    /**
     * Converts a Unicode codepoint to sequence of UTF-8 bytes.
     * @note Shamelessly stolen from HTML Purifier, which is also
     *       shamelessly stolen from Feyd (which is in public domain).
     */
    public static function utf8chr($code) {
        /* We don't care: we live dangerously
         * if($code > 0x10FFFF or $code < 0x0 or
          ($code >= 0xD800 and $code <= 0xDFFF) ) {
            // bits are set outside the "valid" range as defined
            // by UNICODE 4.1.0
            return "\xEF\xBF\xBD";
          }*/
        $x = $y = $z = $w = 0;
        if ($code < 0x80) {
            // regular ASCII character
            $x = $code;
        } else {
            // set up bits for UTF-8
            $x = ($code & 0x3F) | 0x80;
            if ($code < 0x800) {
               $y = (($code & 0x7FF) >> 6) | 0xC0;
            } else {
                $y = (($code & 0xFC0) >> 6) | 0x80;
                if($code < 0x10000) {
                    $z = (($code >> 12) & 0x0F) | 0xE0;
                } else {
                    $z = (($code >> 12) & 0x3F) | 0x80;
                    $w = (($code >> 18) & 0x07) | 0xF0;
                }
            }
        }
        // set up the actual character
        $ret = '';
        if($w) $ret .= chr($w);
        if($z) $ret .= chr($z);
        if($y) $ret .= chr($y);
        $ret .= chr($x);
        return $ret;
    }
 }
--- a/thirdparty/html5lib/HTML5/InputStream.php
+++ b/thirdparty/html5lib/HTML5/InputStream.php
@ -1,284 +0,0 @@
 <?php
 /*
 Copyright 2009 Geoffrey Sneddon <http://gsnedders.com/>
 Permission is hereby granted, free of charge, to any person obtaining a
 copy of this software and associated documentation files (the
 "Software"), to deal in the Software without restriction, including
 without limitation the rights to use, copy, modify, merge, publish,
 distribute, sublicense, and/or sell copies of the Software, and to
 permit persons to whom the Software is furnished to do so, subject to
 the following conditions:
 The above copyright notice and this permission notice shall be included
 in all copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
 // Some conventions:
 // /* */ indicates verbatim text from the HTML 5 specification
 // // indicates regular comments
 class HTML5_InputStream {
    /**
     * The string data we're parsing.
     */
    private $data;
    /**
     * The current integer byte position we are in $data
     */
    private $char;
    /**
     * Length of $data; when $char === $data, we are at the end-of-file.
     */
    private $EOF;
    /**
     * Parse errors.
     */
    public $errors = array();
    /**
     * @param $data Data to parse
     */
    public function __construct($data) {
        /* Given an encoding, the bytes in the input stream must be
        converted to Unicode characters for the tokeniser, as
        described by the rules for that encoding, except that the
        leading U+FEFF BYTE ORDER MARK character, if any, must not
        be stripped by the encoding layer (it is stripped by the rule below).
        Bytes or sequences of bytes in the original byte stream that
        could not be converted to Unicode characters must be converted
        to U+FFFD REPLACEMENT CHARACTER code points. */
        // XXX currently assuming input data is UTF-8; once we
        // build encoding detection this will no longer be the case
        //
        // We previously had an mbstring implementation here, but that
        // implementation is heavily non-conforming, so it's been
        // omitted.
        if (extension_loaded('iconv')) {
            // non-conforming
            $data = @iconv('UTF-8', 'UTF-8//IGNORE', $data);
        } else {
            // we can make a conforming native implementation
            throw new Exception('Not implemented, please install mbstring or iconv');
        }
        /* One leading U+FEFF BYTE ORDER MARK character must be
        ignored if any are present. */
        if (substr($data, 0, 3) === "\xEF\xBB\xBF") {
            $data = substr($data, 3);
        }
        /* All U+0000 NULL characters in the input must be replaced
        by U+FFFD REPLACEMENT CHARACTERs. Any occurrences of such
        characters is a parse error. */
        for ($i = 0, $count = substr_count($data, "\0"); $i < $count; $i++) {
            $this->errors[] = array(
                'type' => HTML5_Tokenizer::PARSEERROR,
                'data' => 'null-character'
            );
        }
        /* U+000D CARRIAGE RETURN (CR) characters and U+000A LINE FEED
        (LF) characters are treated specially. Any CR characters
        that are followed by LF characters must be removed, and any
        CR characters not followed by LF characters must be converted
        to LF characters. Thus, newlines in HTML DOMs are represented
        by LF characters, and there are never any CR characters in the
        input to the tokenization stage. */
        $data = str_replace(
            array(
                "\0",
                "\r\n",
                "\r"
            ),
            array(
                "\xEF\xBF\xBD",
                "\n",
                "\n"
            ),
            $data
        );
        /* Any occurrences of any characters in the ranges U+0001 to
        U+0008, U+000B,  U+000E to U+001F,  U+007F  to U+009F,
        U+D800 to U+DFFF , U+FDD0 to U+FDEF, and
        characters U+FFFE, U+FFFF, U+1FFFE, U+1FFFF, U+2FFFE, U+2FFFF,
        U+3FFFE, U+3FFFF, U+4FFFE, U+4FFFF, U+5FFFE, U+5FFFF, U+6FFFE,
        U+6FFFF, U+7FFFE, U+7FFFF, U+8FFFE, U+8FFFF, U+9FFFE, U+9FFFF,
        U+AFFFE, U+AFFFF, U+BFFFE, U+BFFFF, U+CFFFE, U+CFFFF, U+DFFFE,
        U+DFFFF, U+EFFFE, U+EFFFF, U+FFFFE, U+FFFFF, U+10FFFE, and
        U+10FFFF are parse errors. (These are all control characters
        or permanently undefined Unicode characters.) */
        // Check PCRE is loaded.
        if (extension_loaded('pcre')) {
            $count = preg_match_all(
                '/(?:
                    [\x01-\x08\x0B\x0E-\x1F\x7F] # U+0001 to U+0008, U+000B,  U+000E to U+001F and U+007F
                |
                    \xC2[\x80-\x9F] # U+0080 to U+009F
                |
                    \xED(?:\xA0[\x80-\xFF]|[\xA1-\xBE][\x00-\xFF]|\xBF[\x00-\xBF]) # U+D800 to U+DFFFF
                |
                    \xEF\xB7[\x90-\xAF] # U+FDD0 to U+FDEF
                |
                    \xEF\xBF[\xBE\xBF] # U+FFFE and U+FFFF
                |
                    [\xF0-\xF4][\x8F-\xBF]\xBF[\xBE\xBF] # U+nFFFE and U+nFFFF (1 <= n <= 10_{16})
                )/x',
                $data,
                $matches
            );
            for ($i = 0; $i < $count; $i++) {
                $this->errors[] = array(
                    'type' => HTML5_Tokenizer::PARSEERROR,
                    'data' => 'invalid-codepoint'
                );
            }
        } else {
            // XXX: Need non-PCRE impl, probably using substr_count
        }
        $this->data = $data;
        $this->char = 0;
        $this->EOF  = strlen($data);
    }
    /**
     * Returns the current line that the tokenizer is at.
     */
    public function getCurrentLine() {
        // Check the string isn't empty
        if($this->EOF) {
            // Add one to $this->char because we want the number for the next
            // byte to be processed.
            return substr_count($this->data, "\n", 0, min($this->char, $this->EOF)) + 1;
        } else {
            // If the string is empty, we are on the first line (sorta).
            return 1;
        }
    }
    /**
     * Returns the current column of the current line that the tokenizer is at.
     */
    public function getColumnOffset() {
        // strrpos is weird, and the offset needs to be negative for what we
        // want (i.e., the last \n before $this->char). This needs to not have
        // one (to make it point to the next character, the one we want the
        // position of) added to it because strrpos's behaviour includes the
        // final offset byte.
        $lastLine = strrpos($this->data, "\n", $this->char - 1 - strlen($this->data));
        // However, for here we want the length up until the next byte to be
        // processed, so add one to the current byte ($this->char).
        if($lastLine !== false) {
            $findLengthOf = substr($this->data, $lastLine + 1, $this->char - 1 - $lastLine);
        } else {
            $findLengthOf = substr($this->data, 0, $this->char);
        }
        // Get the length for the string we need.
        if(extension_loaded('iconv')) {
            return iconv_strlen($findLengthOf, 'utf-8');
        } elseif(extension_loaded('mbstring')) {
            return mb_strlen($findLengthOf, 'utf-8');
        } elseif(extension_loaded('xml')) {
            return strlen(utf8_decode($findLengthOf));
        } else {
            $count = count_chars($findLengthOf);
            // 0x80 = 0x7F - 0 + 1 (one added to get inclusive range)
            // 0x33 = 0xF4 - 0x2C + 1 (one added to get inclusive range)
            return array_sum(array_slice($count, 0, 0x80)) +
                   array_sum(array_slice($count, 0xC2, 0x33));
        }
    }
    /**
     * Retrieve the currently consume character.
     * @note This performs bounds checking
     */
    public function char() {
        return ($this->char++ < $this->EOF)
            ? $this->data[$this->char - 1]
            : false;
    }
    /**
     * Get all characters until EOF.
     * @note This performs bounds checking
     */
    public function remainingChars() {
        if($this->char < $this->EOF) {
            $data = substr($this->data, $this->char);
            $this->char = $this->EOF;
            return $data;
        } else {
            return false;
        }
    }
    /**
     * Matches as far as possible until we reach a certain set of bytes
     * and returns the matched substring.
     * @param $bytes Bytes to match.
     */
    public function charsUntil($bytes, $max = null) {
        if ($this->char < $this->EOF) {
            if ($max === 0 || $max) {
                $len = strcspn($this->data, $bytes, $this->char, $max);
            } else {
                $len = strcspn($this->data, $bytes, $this->char);
            }
            $string = (string) substr($this->data, $this->char, $len);
            $this->char += $len;
            return $string;
        } else {
            return false;
        }
    }
    /**
     * Matches as far as possible with a certain set of bytes
     * and returns the matched substring.
     * @param $bytes Bytes to match.
     */
    public function charsWhile($bytes, $max = null) {
        if ($this->char < $this->EOF) {
            if ($max === 0 || $max) {
                $len = strspn($this->data, $bytes, $this->char, $max);
            } else {
                $len = strspn($this->data, $bytes, $this->char);
            }
            $string = (string) substr($this->data, $this->char, $len);
            $this->char += $len;
            return $string;
        } else {
            return false;
        }
    }
    /**
     * Unconsume one character.
     */
    public function unget() {
        if ($this->char <= $this->EOF) {
            $this->char--;
        }
    }
 }
--- a/thirdparty/html5lib/HTML5/Parser.php
+++ b/thirdparty/html5lib/HTML5/Parser.php
@ -1,36 +0,0 @@
 <?php
 require_once dirname(__FILE__) . '/Data.php';
 require_once dirname(__FILE__) . '/InputStream.php';
 require_once dirname(__FILE__) . '/TreeBuilder.php';
 require_once dirname(__FILE__) . '/Tokenizer.php';
 /**
 * Outwards facing interface for HTML5.
 */
 class HTML5_Parser
 {
    /**
     * Parses a full HTML document.
     * @param $text HTML text to parse
     * @param $builder Custom builder implementation
     * @return Parsed HTML as DOMDocument
     */
    static public function parse($text, $builder = null) {
        $tokenizer = new HTML5_Tokenizer($text, $builder);
        $tokenizer->parse();
        return $tokenizer->save();
    }
    /**
     * Parses an HTML fragment.
     * @param $text HTML text to parse
     * @param $context String name of context element to pretend parsing is in.
     * @param $builder Custom builder implementation
     * @return Parsed HTML as DOMDocument
     */
    static public function parseFragment($text, $context = null, $builder = null) {
        $tokenizer = new HTML5_Tokenizer($text, $builder);
        $tokenizer->parseFragment($context);
        return $tokenizer->save();
    }
 }
--- a/thirdparty/html5lib/HTML5/Tokenizer.php
+++ b/thirdparty/html5lib/HTML5/Tokenizer.php
@ -1,2422 +0,0 @@
 <?php
 /*
 Copyright 2007 Jeroen van der Meer <http://jero.net/>
 Copyright 2008 Edward Z. Yang <http://htmlpurifier.org/>
 Copyright 2009 Geoffrey Sneddon <http://gsnedders.com/>
 Permission is hereby granted, free of charge, to any person obtaining a
 copy of this software and associated documentation files (the
 "Software"), to deal in the Software without restriction, including
 without limitation the rights to use, copy, modify, merge, publish,
 distribute, sublicense, and/or sell copies of the Software, and to
 permit persons to whom the Software is furnished to do so, subject to
 the following conditions:
 The above copyright notice and this permission notice shall be included
 in all copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
 // Some conventions:
 // /* */ indicates verbatim text from the HTML 5 specification
 // // indicates regular comments
 // all flags are in hyphenated form
 class HTML5_Tokenizer {
    /**
     * Points to an InputStream object.
     */
    protected $stream;
    /**
     * Tree builder that the tokenizer emits token to.
     */
    private $tree;
    /**
     * Current content model we are parsing as.
     */
    protected $content_model;
    /**
     * Current token that is being built, but not yet emitted. Also
     * is the last token emitted, if applicable.
     */
    protected $token;
    // These are constants describing the content model
    const PCDATA    = 0;
    const RCDATA    = 1;
    const CDATA     = 2;
    const PLAINTEXT = 3;
    // These are constants describing tokens
    // XXX should probably be moved somewhere else, probably the
    // HTML5 class.
    const DOCTYPE        = 0;
    const STARTTAG       = 1;
    const ENDTAG         = 2;
    const COMMENT        = 3;
    const CHARACTER      = 4;
    const SPACECHARACTER = 5;
    const EOF            = 6;
    const PARSEERROR     = 7;
    // These are constants representing bunches of characters.
    const ALPHA       = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz';
    const UPPER_ALPHA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
    const LOWER_ALPHA = 'abcdefghijklmnopqrstuvwxyz';
    const DIGIT       = '0123456789';
    const HEX         = '0123456789ABCDEFabcdef';
    const WHITESPACE  = "\t\n\x0c ";
    /**
     * @param $data Data to parse
     */
    public function __construct($data, $builder = null) {
        $this->stream = new HTML5_InputStream($data);
        if (!$builder) $this->tree = new HTML5_TreeBuilder;
        else $this->tree = $builder;
        $this->content_model = self::PCDATA;
    }
    public function parseFragment($context = null) {
        $this->tree->setupContext($context);
        if ($this->tree->content_model) {
            $this->content_model = $this->tree->content_model;
            $this->tree->content_model = null;
        }
        $this->parse();
    }
    // XXX maybe convert this into an iterator? regardless, this function
    // and the save function should go into a Parser facade of some sort
    /**
     * Performs the actual parsing of the document.
     */
    public function parse() {
        // Current state
        $state = 'data';
        // This is used to avoid having to have look-behind in the data state.
        $lastFourChars = '';
        /**
         * Escape flag as specified by the HTML5 specification: "used to
         * control the behavior of the tokeniser. It is either true or
         * false, and initially must be set to the false state."
         */
        $escape = false;
        //echo "\n\n";
        while($state !== null) {
            /*echo $state . ' ';
            switch ($this->content_model) {
                case self::PCDATA: echo 'PCDATA'; break;
                case self::RCDATA: echo 'RCDATA'; break;
                case self::CDATA: echo 'CDATA'; break;
                case self::PLAINTEXT: echo 'PLAINTEXT'; break;
            }
            if ($escape) echo " escape";
            echo "\n";*/
            switch($state) {
                case 'data':
                    /* Consume the next input character */
                    $char = $this->stream->char();
                    $lastFourChars .= $char;
                    if (strlen($lastFourChars) > 4) $lastFourChars = substr($lastFourChars, -4);
                    // see below for meaning
                    $hyp_cond = 
                        !$escape &&
                        (
                            $this->content_model === self::RCDATA ||
                            $this->content_model === self::CDATA
                        );
                    $amp_cond =
                        !$escape &&
                        (
                            $this->content_model === self::PCDATA ||
                            $this->content_model === self::RCDATA
                        );
                    $lt_cond =
                        $this->content_model === self::PCDATA ||
                        (
                            (
                                $this->content_model === self::RCDATA ||
                                $this->content_model === self::CDATA
                             ) &&
                             !$escape
                        );
                    $gt_cond = 
                        $escape &&
                        (
                            $this->content_model === self::RCDATA ||
                            $this->content_model === self::CDATA
                        );
                    if($char === '&' && $amp_cond) {
                        /* U+0026 AMPERSAND (&)
                        When the content model flag is set to one of the PCDATA or RCDATA
                        states and the escape flag is false: switch to the
                        character reference data state. Otherwise: treat it as per
                        the "anything else" entry below. */
                        $state = 'character reference data';
                    } elseif(
                        $char === '-' &&
                        $hyp_cond &&
                        $lastFourChars === '<!--'
                    ) {
                        /*
                        U+002D HYPHEN-MINUS (-)
                        If the content model flag is set to either the RCDATA state or
                        the CDATA state, and the escape flag is false, and there are at
                        least three characters before this one in the input stream, and the
                        last four characters in the input stream, including this one, are
                        U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS,
                        and U+002D HYPHEN-MINUS ("<!--"), then set the escape flag to true. */
                        $escape = true;
                        /* In any case, emit the input character as a character token. Stay
                        in the data state. */
                        $this->emitToken(array(
                            'type' => self::CHARACTER,
                            'data' => '-'
                        ));
                        // We do the "any case" part as part of "anything else".
                    /* U+003C LESS-THAN SIGN (<) */
                    } elseif($char === '<' && $lt_cond) {
                        /* When the content model flag is set to the PCDATA state: switch
                        to the tag open state.
                        When the content model flag is set to either the RCDATA state or
                        the CDATA state and the escape flag is false: switch to the tag
                        open state.
                        Otherwise: treat it as per the "anything else" entry below. */
                        $state = 'tag open';
                    /* U+003E GREATER-THAN SIGN (>) */
                    } elseif(
                        $char === '>' &&
                        $gt_cond &&
                        substr($lastFourChars, 1) === '-->'
                    ) {
                        /* If the content model flag is set to either the RCDATA state or
                        the CDATA state, and the escape flag is true, and the last three
                        characters in the input stream including this one are U+002D
                        HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN ("-->"),
                        set the escape flag to false. */
                        $escape = false;
                        /* In any case, emit the input character as a character token.
                        Stay in the data state. */
                        $this->emitToken(array(
                            'type' => self::CHARACTER,
                            'data' => '>'
                        ));
                        // We do the "any case" part as part of "anything else".
                    } elseif($char === false) {
                        /* EOF
                        Emit an end-of-file token. */
                        $state = null;
                        $this->tree->emitToken(array(
                            'type' => self::EOF
                        ));
                    } elseif($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
                        // Directly after emitting a token you switch back to the "data
                        // state". At that point spaceCharacters are important so they are
                        // emitted separately.
                        $chars = $this->stream->charsWhile(self::WHITESPACE);
                        $this->emitToken(array(
                            'type' => self::SPACECHARACTER,
                            'data' => $char . $chars
                        ));
                        $lastFourChars .= $chars;
                        if (strlen($lastFourChars) > 4) $lastFourChars = substr($lastFourChars, -4);
                    } else {
                        /* Anything else
                        THIS IS AN OPTIMIZATION: Get as many character that
                        otherwise would also be treated as a character token and emit it
                        as a single character token. Stay in the data state. */
                        $mask = '';
                        if ($hyp_cond) $mask .= '-';
                        if ($amp_cond) $mask .= '&';
                        if ($lt_cond)  $mask .= '<';
                        if ($gt_cond)  $mask .= '>';
                        if ($mask === '') {
                            $chars = $this->stream->remainingChars();
                        } else {
                            $chars = $this->stream->charsUntil($mask);
                        }
                        $this->emitToken(array(
                            'type' => self::CHARACTER,
                            'data' => $char . $chars
                        ));
                        $lastFourChars .= $chars;
                        if (strlen($lastFourChars) > 4) $lastFourChars = substr($lastFourChars, -4);
                        $state = 'data';
                    }
                break;
                case 'character reference data':
                    /* (This cannot happen if the content model flag
                    is set to the CDATA state.) */
                    /* Attempt to consume a character reference, with no
                    additional allowed character. */
                    $entity = $this->consumeCharacterReference();
                    /* If nothing is returned, emit a U+0026 AMPERSAND
                    character token. Otherwise, emit the character token that
                    was returned. */
                    // This is all done when consuming the character reference.
                    $this->emitToken(array(
                        'type' => self::CHARACTER,
                        'data' => $entity
                    ));
                    /* Finally, switch to the data state. */
                    $state = 'data';
                break;
                case 'tag open':
                    $char = $this->stream->char();
                    switch($this->content_model) {
                        case self::RCDATA:
                        case self::CDATA:
                            /* Consume the next input character. If it is a
                            U+002F SOLIDUS (/) character, switch to the close
                            tag open state. Otherwise, emit a U+003C LESS-THAN
                            SIGN character token and reconsume the current input
                            character in the data state. */
                            // We consumed above.
                            if($char === '/') {
                                $state = 'close tag open';
                            } else {
                                $this->emitToken(array(
                                    'type' => self::CHARACTER,
                                    'data' => '<'
                                ));
                                $this->stream->unget();
                                $state = 'data';
                            }
                        break;
                        case self::PCDATA:
                            /* If the content model flag is set to the PCDATA state
                            Consume the next input character: */
                            // We consumed above.
                            if($char === '!') {
                                /* U+0021 EXCLAMATION MARK (!)
                                Switch to the markup declaration open state. */
                                $state = 'markup declaration open';
                            } elseif($char === '/') {
                                /* U+002F SOLIDUS (/)
                                Switch to the close tag open state. */
                                $state = 'close tag open';
                            } elseif('A' <= $char && $char <= 'Z') {
                                /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
                                Create a new start tag token, set its tag name to the lowercase
                                version of the input character (add 0x0020 to the character's code
                                point), then switch to the tag name state. (Don't emit the token
                                yet; further details will be filled in before it is emitted.) */
                                $this->token = array(
                                    'name'  => strtolower($char),
                                    'type'  => self::STARTTAG,
                                    'attr'  => array()
                                );
                                $state = 'tag name';
                            } elseif('a' <= $char && $char <= 'z') {
                                /* U+0061 LATIN SMALL LETTER A through to U+007A LATIN SMALL LETTER Z
                                Create a new start tag token, set its tag name to the input
                                character, then switch to the tag name state. (Don't emit
                                the token yet; further details will be filled in before it
                                is emitted.) */
                                $this->token = array(
                                    'name'  => $char,
                                    'type'  => self::STARTTAG,
                                    'attr'  => array()
                                );
                                $state = 'tag name';
                            } elseif($char === '>') {
                                /* U+003E GREATER-THAN SIGN (>)
                                Parse error. Emit a U+003C LESS-THAN SIGN character token and a
                                U+003E GREATER-THAN SIGN character token. Switch to the data state. */
                                $this->emitToken(array(
                                    'type' => self::PARSEERROR,
                                    'data' => 'expected-tag-name-but-got-right-bracket'
                                ));
                                $this->emitToken(array(
                                    'type' => self::CHARACTER,
                                    'data' => '<>'
                                ));
                                $state = 'data';
                            } elseif($char === '?') {
                                /* U+003F QUESTION MARK (?)
                                Parse error. Switch to the bogus comment state. */
                                $this->emitToken(array(
                                    'type' => self::PARSEERROR,
                                    'data' => 'expected-tag-name-but-got-question-mark'
                                ));
                                $this->token = array(
                                    'data' => '?',
                                    'type' => self::COMMENT
                                );
                                $state = 'bogus comment';
                            } else {
                                /* Anything else
                                Parse error. Emit a U+003C LESS-THAN SIGN character token and
                                reconsume the current input character in the data state. */
                                $this->emitToken(array(
                                    'type' => self::PARSEERROR,
                                    'data' => 'expected-tag-name'
                                ));
                                $this->emitToken(array(
                                    'type' => self::CHARACTER,
                                    'data' => '<'
                                ));
                                $state = 'data';
                                $this->stream->unget();
                            }
                        break;
                    }
                break;
                case 'close tag open':
                    if (
                        $this->content_model === self::RCDATA ||
                        $this->content_model === self::CDATA
                    ) {
                        /* If the content model flag is set to the RCDATA or CDATA
                        states... */
                        $name = strtolower($this->stream->charsWhile(self::ALPHA));
                        $following = $this->stream->char();
                        $this->stream->unget();
                        if (
                            !$this->token ||
                            $this->token['name'] !== $name ||
                            $this->token['name'] === $name && !in_array($following, array("\x09", "\x0A", "\x0C", "\x20", "\x3E", "\x2F", false))
                        ) {
                            /* if no start tag token has ever been emitted by this instance
                            of the tokenizer (fragment case), or, if the next few
                            characters do not match the tag name of the last start tag
                            token emitted (compared in an ASCII case-insensitive manner),
                            or if they do but they are not immediately followed by one of
                            the following characters:
                                * U+0009 CHARACTER TABULATION
                                * U+000A LINE FEED (LF)
                                * U+000C FORM FEED (FF)
                                * U+0020 SPACE
                                * U+003E GREATER-THAN SIGN (>)
                                * U+002F SOLIDUS (/)
                                * EOF
                            ...then emit a U+003C LESS-THAN SIGN character token, a
                            U+002F SOLIDUS character token, and switch to the data
                            state to process the next input character. */
                            // XXX: Probably ought to replace in_array with $following === x ||...
                            // We also need to emit $name now we've consumed that, as we
                            // know it'll just be emitted as a character token.
                            $this->emitToken(array(
                                'type' => self::CHARACTER,
                                'data' => '</' . $name
                            ));
                            $state = 'data';
                        } else {
                            // This matches what would happen if we actually did the
                            // otherwise below (but we can't because we've consumed too
                            // much).
                            // Start the end tag token with the name we already have.
                            $this->token = array(
                                'name'  => $name,
                                'type'  => self::ENDTAG
                            );
                            // Change to tag name state.
                            $state = 'tag name';
                        }
                    } elseif ($this->content_model === self::PCDATA) {
                        /* Otherwise, if the content model flag is set to the PCDATA
                        state [...]: */
                        $char = $this->stream->char();
                        if ('A' <= $char && $char <= 'Z') {
                            /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
                            Create a new end tag token, set its tag name to the lowercase version
                            of the input character (add 0x0020 to the character's code point), then
                            switch to the tag name state. (Don't emit the token yet; further details
                            will be filled in before it is emitted.) */
                            $this->token = array(
                                'name'  => strtolower($char),
                                'type'  => self::ENDTAG
                            );
                            $state = 'tag name';
                        } elseif ('a' <= $char && $char <= 'z') {
                            /* U+0061 LATIN SMALL LETTER A through to U+007A LATIN SMALL LETTER Z
                            Create a new end tag token, set its tag name to the
                            input character, then switch to the tag name state.
                            (Don't emit the token yet; further details will be
                            filled in before it is emitted.) */
                            $this->token = array(
                                'name'  => $char,
                                'type'  => self::ENDTAG
                            );
                            $state = 'tag name';
                        } elseif($char === '>') {
                            /* U+003E GREATER-THAN SIGN (>)
                            Parse error. Switch to the data state. */
                            $this->emitToken(array(
                                'type' => self::PARSEERROR,
                                'data' => 'expected-closing-tag-but-got-right-bracket'
                            ));
                            $state = 'data';
                        } elseif($char === false) {
                            /* EOF
                            Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F
                            SOLIDUS character token. Reconsume the EOF character in the data state. */
                            $this->emitToken(array(
                                'type' => self::PARSEERROR,
                                'data' => 'expected-closing-tag-but-got-eof'
                            ));
                            $this->emitToken(array(
                                'type' => self::CHARACTER,
                                'data' => '</'
                            ));
                            $this->stream->unget();
                            $state = 'data';
                        } else {
                            /* Parse error. Switch to the bogus comment state. */
                            $this->emitToken(array(
                                'type' => self::PARSEERROR,
                                'data' => 'expected-closing-tag-but-got-char'
                            ));
                            $this->token = array(
                                'data' => $char,
                                'type' => self::COMMENT
                            );
                            $state = 'bogus comment';
                        }
                    }
                break;
                case 'tag name':
                    /* Consume the next input character: */
                    $char = $this->stream->char();
                    if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
                        /* U+0009 CHARACTER TABULATION
                        U+000A LINE FEED (LF)
                        U+000C FORM FEED (FF)
                        U+0020 SPACE
                        Switch to the before attribute name state. */
                        $state = 'before attribute name';
                    } elseif($char === '/') {
                        /* U+002F SOLIDUS (/)
                        Switch to the self-closing start tag state. */
                        $state = 'self-closing start tag';
                    } elseif($char === '>') {
                        /* U+003E GREATER-THAN SIGN (>)
                        Emit the current tag token. Switch to the data state. */
                        $this->emitToken($this->token);
                        $state = 'data';
                    } elseif('A' <= $char && $char <= 'Z') {
                        /* U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z
                        Append the lowercase version of the current input
                        character (add 0x0020 to the character's code point) to
                        the current tag token's tag name. Stay in the tag name state. */
                        $chars = $this->stream->charsWhile(self::UPPER_ALPHA);
                        $this->token['name'] .= strtolower($char . $chars);
                        $state = 'tag name';
                    } elseif($char === false) {
                        /* EOF
                        Parse error. Reconsume the EOF character in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'eof-in-tag-name'
                        ));
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* Anything else
                        Append the current input character to the current tag token's tag name.
                        Stay in the tag name state. */
                        $chars = $this->stream->charsUntil("\t\n\x0C />" . self::UPPER_ALPHA);
                        $this->token['name'] .= $char . $chars;
                        $state = 'tag name';
                    }
                break;
                case 'before attribute name':
                    /* Consume the next input character: */
                    $char = $this->stream->char();
                    // this conditional is optimized, check bottom
                    if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
                        /* U+0009 CHARACTER TABULATION
                        U+000A LINE FEED (LF)
                        U+000C FORM FEED (FF)
                        U+0020 SPACE
                        Stay in the before attribute name state. */
                        $state = 'before attribute name';
                    } elseif($char === '/') {
                        /* U+002F SOLIDUS (/)
                        Switch to the self-closing start tag state. */
                        $state = 'self-closing start tag';
                    } elseif($char === '>') {
                        /* U+003E GREATER-THAN SIGN (>)
                        Emit the current tag token. Switch to the data state. */
                        $this->emitToken($this->token);
                        $state = 'data';
                    } elseif('A' <= $char && $char <= 'Z') {
                        /* U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z
                        Start a new attribute in the current tag token. Set that
                        attribute's name to the lowercase version of the current
                        input character (add 0x0020 to the character's code
                        point), and its value to the empty string. Switch to the
                        attribute name state.*/
                        $this->token['attr'][] = array(
                            'name'  => strtolower($char),
                            'value' => ''
                        );
                        $state = 'attribute name';
                    } elseif($char === false) {
                        /* EOF
                        Parse error. Reconsume the EOF character in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'expected-attribute-name-but-got-eof'
                        ));
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* U+0022 QUOTATION MARK (")
                           U+0027 APOSTROPHE (')
                           U+003C LESS-THAN SIGN (<)
                           U+003D EQUALS SIGN (=)
                        Parse error. Treat it as per the "anything else" entry
                        below. */
                        if($char === '"' || $char === "'" || $char === '<' || $char === '=') {
                            $this->emitToken(array(
                                'type' => self::PARSEERROR,
                                'data' => 'invalid-character-in-attribute-name'
                            ));
                        }
                        /* Anything else
                        Start a new attribute in the current tag token. Set that attribute's
                        name to the current input character, and its value to the empty string.
                        Switch to the attribute name state. */
                        $this->token['attr'][] = array(
                            'name'  => $char,
                            'value' => ''
                        );
                        $state = 'attribute name';
                    }
                break;
                case 'attribute name':
                    // Consume the next input character:
                    $char = $this->stream->char();
                    // this conditional is optimized, check bottom
                    if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
                        /* U+0009 CHARACTER TABULATION
                        U+000A LINE FEED (LF)
                        U+000C FORM FEED (FF)
                        U+0020 SPACE
                        Switch to the after attribute name state. */
                        $state = 'after attribute name';
                    } elseif($char === '/') {
                        /* U+002F SOLIDUS (/)
                        Switch to the self-closing start tag state. */
                        $state = 'self-closing start tag';
                    } elseif($char === '=') {
                        /* U+003D EQUALS SIGN (=)
                        Switch to the before attribute value state. */
                        $state = 'before attribute value';
                    } elseif($char === '>') {
                        /* U+003E GREATER-THAN SIGN (>)
                        Emit the current tag token. Switch to the data state. */
                        $this->emitToken($this->token);
                        $state = 'data';
                    } elseif('A' <= $char && $char <= 'Z') {
                        /* U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z
                        Append the lowercase version of the current input
                        character (add 0x0020 to the character's code point) to
                        the current attribute's name. Stay in the attribute name
                        state. */
                        $chars = $this->stream->charsWhile(self::UPPER_ALPHA);
                        $last = count($this->token['attr']) - 1;
                        $this->token['attr'][$last]['name'] .= strtolower($char . $chars);
                        $state = 'attribute name';
                    } elseif($char === false) {
                        /* EOF
                        Parse error. Reconsume the EOF character in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'eof-in-attribute-name'
                        ));
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* U+0022 QUOTATION MARK (")
                           U+0027 APOSTROPHE (')
                           U+003C LESS-THAN SIGN (<)
                        Parse error. Treat it as per the "anything else"
                        entry below. */
                        if($char === '"' || $char === "'" || $char === '<') {
                            $this->emitToken(array(
                                'type' => self::PARSEERROR,
                                'data' => 'invalid-character-in-attribute-name'
                            ));
                        }
                        /* Anything else
                        Append the current input character to the current attribute's name.
                        Stay in the attribute name state. */
                        $chars = $this->stream->charsUntil("\t\n\x0C /=>\"'" . self::UPPER_ALPHA);
                        $last = count($this->token['attr']) - 1;
                        $this->token['attr'][$last]['name'] .= $char . $chars;
                        $state = 'attribute name';
                    }
                    /* When the user agent leaves the attribute name state
                    (and before emitting the tag token, if appropriate), the
                    complete attribute's name must be compared to the other
                    attributes on the same token; if there is already an
                    attribute on the token with the exact same name, then this
                    is a parse error and the new attribute must be dropped, along
                    with the value that gets associated with it (if any). */
                    // this might be implemented in the emitToken method
                break;
                case 'after attribute name':
                    // Consume the next input character:
                    $char = $this->stream->char();
                    // this is an optimized conditional, check the bottom
                    if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
                        /* U+0009 CHARACTER TABULATION
                        U+000A LINE FEED (LF)
                        U+000C FORM FEED (FF)
                        U+0020 SPACE
                        Stay in the after attribute name state. */
                        $state = 'after attribute name';
                    } elseif($char === '/') {
                        /* U+002F SOLIDUS (/)
                        Switch to the self-closing start tag state. */
                        $state = 'self-closing start tag';
                    } elseif($char === '=') {
                        /* U+003D EQUALS SIGN (=)
                        Switch to the before attribute value state. */
                        $state = 'before attribute value';
                    } elseif($char === '>') {
                        /* U+003E GREATER-THAN SIGN (>)
                        Emit the current tag token. Switch to the data state. */
                        $this->emitToken($this->token);
                        $state = 'data';
                    } elseif('A' <= $char && $char <= 'Z') {
                        /* U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z
                        Start a new attribute in the current tag token. Set that
                        attribute's name to the lowercase version of the current
                        input character (add 0x0020 to the character's code
                        point), and its value to the empty string. Switch to the
                        attribute name state. */
                        $this->token['attr'][] = array(
                            'name'  => strtolower($char),
                            'value' => ''
                        );
                        $state = 'attribute name';
                    } elseif($char === false) {
                        /* EOF
                        Parse error. Reconsume the EOF character in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'expected-end-of-tag-but-got-eof'
                        ));
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* U+0022 QUOTATION MARK (")
                           U+0027 APOSTROPHE (')
                           U+003C LESS-THAN SIGN(<)
                        Parse error. Treat it as per the "anything else"
                        entry below. */
                        if($char === '"' || $char === "'" || $char === "<") {
                            $this->emitToken(array(
                                'type' => self::PARSEERROR,
                                'data' => 'invalid-character-after-attribute-name'
                            ));
                        }
                        /* Anything else
                        Start a new attribute in the current tag token. Set that attribute's
                        name to the current input character, and its value to the empty string.
                        Switch to the attribute name state. */
                        $this->token['attr'][] = array(
                            'name'  => $char,
                            'value' => ''
                        );
                        $state = 'attribute name';
                    }
                break;
                case 'before attribute value':
                    // Consume the next input character:
                    $char = $this->stream->char();
                    // this is an optimized conditional
                    if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
                        /* U+0009 CHARACTER TABULATION
                        U+000A LINE FEED (LF)
                        U+000C FORM FEED (FF)
                        U+0020 SPACE
                        Stay in the before attribute value state. */
                        $state = 'before attribute value';
                    } elseif($char === '"') {
                        /* U+0022 QUOTATION MARK (")
                        Switch to the attribute value (double-quoted) state. */
                        $state = 'attribute value (double-quoted)';
                    } elseif($char === '&') {
                        /* U+0026 AMPERSAND (&)
                        Switch to the attribute value (unquoted) state and reconsume
                        this input character. */
                        $this->stream->unget();
                        $state = 'attribute value (unquoted)';
                    } elseif($char === '\'') {
                        /* U+0027 APOSTROPHE (')
                        Switch to the attribute value (single-quoted) state. */
                        $state = 'attribute value (single-quoted)';
                    } elseif($char === '>') {
                        /* U+003E GREATER-THAN SIGN (>)
                        Parse error. Emit the current tag token. Switch to the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'expected-attribute-value-but-got-right-bracket'
                        ));
                        $this->emitToken($this->token);
                        $state = 'data';
                    } elseif($char === false) {
                        /* EOF
                        Parse error. Reconsume the EOF character in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'expected-attribute-value-but-got-eof'
                        ));
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* U+003D EQUALS SIGN (=)
                         * U+003C LESS-THAN SIGN (<)
                        Parse error. Treat it as per the "anything else" entry below. */
                        if($char === '=' || $char === '<') {
                            $this->emitToken(array(
                                'type' => self::PARSEERROR,
                                'data' => 'equals-in-unquoted-attribute-value'
                            ));
                        }
                        /* Anything else
                        Append the current input character to the current attribute's value.
                        Switch to the attribute value (unquoted) state. */
                        $last = count($this->token['attr']) - 1;
                        $this->token['attr'][$last]['value'] .= $char;
                        $state = 'attribute value (unquoted)';
                    }
                break;
                case 'attribute value (double-quoted)':
                    // Consume the next input character:
                    $char = $this->stream->char();
                    if($char === '"') {
                        /* U+0022 QUOTATION MARK (")
                        Switch to the after attribute value (quoted) state. */
                        $state = 'after attribute value (quoted)';
                    } elseif($char === '&') {
                        /* U+0026 AMPERSAND (&)
                        Switch to the character reference in attribute value
                        state, with the additional allowed character
                        being U+0022 QUOTATION MARK ("). */
                        $this->characterReferenceInAttributeValue('"');
                    } elseif($char === false) {
                        /* EOF
                        Parse error. Reconsume the EOF character in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'eof-in-attribute-value-double-quote'
                        ));
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* Anything else
                        Append the current input character to the current attribute's value.
                        Stay in the attribute value (double-quoted) state. */
                        $chars = $this->stream->charsUntil('"&');
                        $last = count($this->token['attr']) - 1;
                        $this->token['attr'][$last]['value'] .= $char . $chars;
                        $state = 'attribute value (double-quoted)';
                    }
                break;
                case 'attribute value (single-quoted)':
                    // Consume the next input character:
                    $char = $this->stream->char();
                    if($char === "'") {
                        /* U+0022 QUOTATION MARK (')
                        Switch to the after attribute value state. */
                        $state = 'after attribute value (quoted)';
                    } elseif($char === '&') {
                        /* U+0026 AMPERSAND (&)
                        Switch to the entity in attribute value state. */
                        $this->characterReferenceInAttributeValue("'");
                    } elseif($char === false) {
                        /* EOF
                        Parse error. Reconsume the EOF character in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'eof-in-attribute-value-single-quote'
                        ));
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* Anything else
                        Append the current input character to the current attribute's value.
                        Stay in the attribute value (single-quoted) state. */
                        $chars = $this->stream->charsUntil("'&");
                        $last = count($this->token['attr']) - 1;
                        $this->token['attr'][$last]['value'] .= $char . $chars;
                        $state = 'attribute value (single-quoted)';
                    }
                break;
                case 'attribute value (unquoted)':
                    // Consume the next input character:
                    $char = $this->stream->char();
                    if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
                        /* U+0009 CHARACTER TABULATION
                        U+000A LINE FEED (LF)
                        U+000C FORM FEED (FF)
                        U+0020 SPACE
                        Switch to the before attribute name state. */
                        $state = 'before attribute name';
                    } elseif($char === '&') {
                        /* U+0026 AMPERSAND (&)
                        Switch to the entity in attribute value state, with the 
                        additional allowed character  being U+003E 
                        GREATER-THAN SIGN (>). */
                        $this->characterReferenceInAttributeValue('>');
                    } elseif($char === '>') {
                        /* U+003E GREATER-THAN SIGN (>)
                        Emit the current tag token. Switch to the data state. */
                        $this->emitToken($this->token);
                        $state = 'data';
                    } elseif ($char === false) {
                        /* EOF
                        Parse error. Reconsume the EOF character in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'eof-in-attribute-value-no-quotes'
                        ));
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* U+0022 QUOTATION MARK (")
                           U+0027 APOSTROPHE (')
                           U+003C LESS-THAN SIGN (<)
                           U+003D EQUALS SIGN (=)
                        Parse error. Treat it as per the "anything else"
                        entry below. */
                        if($char === '"' || $char === "'" || $char === '=' || $char == '<') {
                            $this->emitToken(array(
                                'type' => self::PARSEERROR,
                                'data' => 'unexpected-character-in-unquoted-attribute-value'
                            ));
                        }
                        /* Anything else
                        Append the current input character to the current attribute's value.
                        Stay in the attribute value (unquoted) state. */
                        $chars = $this->stream->charsUntil("\t\n\x0c &>\"'=");
                        $last = count($this->token['attr']) - 1;
                        $this->token['attr'][$last]['value'] .= $char . $chars;
                        $state = 'attribute value (unquoted)';
                    }
                break;
                case 'after attribute value (quoted)':
                    /* Consume the next input character: */
                    $char = $this->stream->char();
                    if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
                        /* U+0009 CHARACTER TABULATION
                           U+000A LINE FEED (LF)
                           U+000C FORM FEED (FF)
                           U+0020 SPACE
                        Switch to the before attribute name state. */
                        $state = 'before attribute name';
                    } elseif ($char === '/') {
                        /* U+002F SOLIDUS (/)
                        Switch to the self-closing start tag state. */
                        $state = 'self-closing start tag';
                    } elseif ($char === '>') {
                        /* U+003E GREATER-THAN SIGN (>)
                        Emit the current tag token. Switch to the data state. */
                        $this->emitToken($this->token);
                        $state = 'data';
                    } elseif ($char === false) {
                        /* EOF
                        Parse error. Reconsume the EOF character in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'unexpected-EOF-after-attribute-value'
                        ));
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* Anything else
                        Parse error. Reconsume the character in the before attribute
                        name state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'unexpected-character-after-attribute-value'
                        ));
                        $this->stream->unget();
                        $state = 'before attribute name';
                    }
                break;
                case 'self-closing start tag':
                    /* Consume the next input character: */
                    $char = $this->stream->char();
                    if ($char === '>') {
                        /* U+003E GREATER-THAN SIGN (>)
                        Set the self-closing flag of the current tag token.
                        Emit the current tag token. Switch to the data state. */
                        // not sure if this is the name we want
                        $this->token['self-closing'] = true;
                        $this->emitToken($this->token);
                        $state = 'data';
                    } elseif ($char === false) {
                        /* EOF
                        Parse error. Reconsume the EOF character in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'unexpected-eof-after-self-closing'
                        ));
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* Anything else
                        Parse error. Reconsume the character in the before attribute name state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'unexpected-character-after-self-closing'
                        ));
                        $this->stream->unget();
                        $state = 'before attribute name';
                    }
                break;
                case 'bogus comment':
                    /* (This can only happen if the content model flag is set to the PCDATA state.) */
                    /* Consume every character up to the first U+003E GREATER-THAN SIGN
                    character (>) or the end of the file (EOF), whichever comes first. Emit
                    a comment token whose data is the concatenation of all the characters
                    starting from and including the character that caused the state machine
                    to switch into the bogus comment state, up to and including the last
                    consumed character before the U+003E character, if any, or up to the
                    end of the file otherwise. (If the comment was started by the end of
                    the file (EOF), the token is empty.) */
                    $this->token['data'] .= (string) $this->stream->charsUntil('>');
                    $this->stream->char();
                    $this->emitToken($this->token);
                    /* Switch to the data state. */
                    $state = 'data';
                break;
                case 'markup declaration open':
                    // Consume for below
                    $hyphens = $this->stream->charsWhile('-', 2);
                    if ($hyphens === '-') {
                        $this->stream->unget();
                    }
                    if ($hyphens !== '--') {
                        $alpha = $this->stream->charsWhile(self::ALPHA, 7);
                    }
                    /* If the next two characters are both U+002D HYPHEN-MINUS (-)
                    characters, consume those two characters, create a comment token whose
                    data is the empty string, and switch to the comment state. */
                    if($hyphens === '--') {
                        $state = 'comment start';
                        $this->token = array(
                            'data' => '',
                            'type' => self::COMMENT
                        );
                    /* Otherwise if the next seven characters are a case-insensitive match
                    for the word "DOCTYPE", then consume those characters and switch to the
                    DOCTYPE state. */
                    } elseif(strtoupper($alpha) === 'DOCTYPE') {
                        $state = 'DOCTYPE';
                    // XXX not implemented
                    /* Otherwise, if the insertion mode is "in foreign content"
                    and the current node is not an element in the HTML namespace
                    and the next seven characters are an ASCII case-sensitive
                    match for the string "[CDATA[" (the five uppercase letters
                    "CDATA" with a U+005B LEFT SQUARE BRACKET character before
                    and after), then consume those characters and switch to the
                    CDATA section state (which is unrelated to the content model
                    flag's CDATA state). */
                    /* Otherwise, is is a parse error. Switch to the bogus comment state.
                    The next character that is consumed, if any, is the first character
                    that will be in the comment. */
                    } else {
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'expected-dashes-or-doctype'
                        ));
                        $this->token = array(
                            'data' => (string) $alpha,
                            'type' => self::COMMENT
                        );
                        $state = 'bogus comment';
                    }
                break;
                case 'comment start':
                    /* Consume the next input character: */
                    $char = $this->stream->char();
                    if ($char === '-') {
                        /* U+002D HYPHEN-MINUS (-)
                        Switch to the comment start dash state. */
                        $state = 'comment start dash';
                    } elseif ($char === '>') {
                        /* U+003E GREATER-THAN SIGN (>)
                        Parse error. Emit the comment token. Switch to the
                        data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'incorrect-comment'
                        ));
                        $this->emitToken($this->token);
                        $state = 'data';
                    } elseif ($char === false) {
                        /* EOF
                        Parse error. Emit the comment token. Reconsume the
                        EOF character in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'eof-in-comment'
                        ));
                        $this->emitToken($this->token);
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* Anything else
                        Append the input character to the comment token's
                        data. Switch to the comment state. */
                        $this->token['data'] .= $char;
                        $state = 'comment';
                    }
                break;
                case 'comment start dash':
                    /* Consume the next input character: */
                    $char = $this->stream->char();
                    if ($char === '-') {
                        /* U+002D HYPHEN-MINUS (-)
                        Switch to the comment end state */
                        $state = 'comment end';
                    } elseif ($char === '>') {
                        /* U+003E GREATER-THAN SIGN (>)
                        Parse error. Emit the comment token. Switch to the
                        data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'incorrect-comment'
                        ));
                        $this->emitToken($this->token);
                        $state = 'data';
                    } elseif ($char === false) {
                        /* Parse error. Emit the comment token. Reconsume the
                        EOF character in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'eof-in-comment'
                        ));
                        $this->emitToken($this->token);
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        $this->token['data'] .= '-' . $char;
                        $state = 'comment';
                    }
                break;
                case 'comment':
                    /* Consume the next input character: */
                    $char = $this->stream->char();
                    if($char === '-') {
                        /* U+002D HYPHEN-MINUS (-)
                        Switch to the comment end dash state */
                        $state = 'comment end dash';
                    } elseif($char === false) {
                        /* EOF
                        Parse error. Emit the comment token. Reconsume the EOF character
                        in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'eof-in-comment'
                        ));
                        $this->emitToken($this->token);
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* Anything else
                        Append the input character to the comment token's data. Stay in
                        the comment state. */
                        $chars = $this->stream->charsUntil('-');
                        $this->token['data'] .= $char . $chars;
                    }
                break;
                case 'comment end dash':
                    /* Consume the next input character: */
                    $char = $this->stream->char();
                    if($char === '-') {
                        /* U+002D HYPHEN-MINUS (-)
                        Switch to the comment end state  */
                        $state = 'comment end';
                    } elseif($char === false) {
                        /* EOF
                        Parse error. Emit the comment token. Reconsume the EOF character
                        in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'eof-in-comment-end-dash'
                        ));
                        $this->emitToken($this->token);
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* Anything else
                        Append a U+002D HYPHEN-MINUS (-) character and the input
                        character to the comment token's data. Switch to the comment state. */
                        $this->token['data'] .= '-'.$char;
                        $state = 'comment';
                    }
                break;
                case 'comment end':
                    /* Consume the next input character: */
                    $char = $this->stream->char();
                    if($char === '>') {
                        /* U+003E GREATER-THAN SIGN (>)
                        Emit the comment token. Switch to the data state. */
                        $this->emitToken($this->token);
                        $state = 'data';
                    } elseif($char === '-') {
                        /* U+002D HYPHEN-MINUS (-)
                        Parse error. Append a U+002D HYPHEN-MINUS (-) character
                        to the comment token's data. Stay in the comment end
                        state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'unexpected-dash-after-double-dash-in-comment'
                        ));
                        $this->token['data'] .= '-';
                    } elseif($char === "\t" || $char === "\n" || $char === "\x0a" || $char === ' ') {
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'unexpected-space-after-double-dash-in-comment'
                        ));
                        $this->token['data'] .= '--' . $char;
                        $state = 'comment end space';
                    } elseif($char === '!') {
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'unexpected-bang-after-double-dash-in-comment'
                        ));
                        $state = 'comment end bang';
                    } elseif($char === false) {
                        /* EOF
                        Parse error. Emit the comment token. Reconsume the
                        EOF character in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'eof-in-comment-double-dash'
                        ));
                        $this->emitToken($this->token);
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* Anything else
                        Parse error. Append two U+002D HYPHEN-MINUS (-)
                        characters and the input character to the comment token's
                        data. Switch to the comment state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'unexpected-char-in-comment'
                        ));
                        $this->token['data'] .= '--'.$char;
                        $state = 'comment';
                    }
                break;
                case 'comment end bang':
                    $char = $this->stream->char();
                    if ($char === '>') {
                        $this->emitToken($this->token);
                        $state = 'data';
                    } elseif ($char === "-") {
                        $this->token['data'] .= '--!';
                        $state = 'comment end dash';
                    } elseif ($char === false) {
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'eof-in-comment-end-bang'
                        ));
                        $this->emitToken($this->token);
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        $this->token['data'] .= '--!' . $char;
                        $state = 'comment';
                    }
                break;
                case 'comment end space':
                    $char = $this->stream->char();
                    if ($char === '>') {
                        $this->emitToken($this->token);
                        $state = 'data';
                    } elseif ($char === '-') {
                        $state = 'comment end dash';
                    } elseif ($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
                        $this->token['data'] .= $char;
                    } elseif ($char === false) {
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'unexpected-eof-in-comment-end-space',
                        ));
                        $this->emitToken($this->token);
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        $this->token['data'] .= $char;
                        $state = 'comment';
                    }
                break;
                case 'DOCTYPE':
                    /* Consume the next input character: */
                    $char = $this->stream->char();
                    if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
                        /* U+0009 CHARACTER TABULATION
                           U+000A LINE FEED (LF)
                           U+000C FORM FEED (FF)
                           U+0020 SPACE
                        Switch to the before DOCTYPE name state. */
                        $state = 'before DOCTYPE name';
                    } elseif($char === false) {
                        /* EOF
                        Parse error. Create a new DOCTYPE token. Set its
                        force-quirks flag to on. Emit the token. Reconsume the
                        EOF character in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'need-space-after-doctype-but-got-eof'
                        ));
                        $this->emitToken(array(
                            'name' => '',
                            'type' => self::DOCTYPE,
                            'force-quirks' => true,
                            'error' => true
                        ));
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* Anything else
                        Parse error. Reconsume the current character in the
                        before DOCTYPE name state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'need-space-after-doctype'
                        ));
                        $this->stream->unget();
                        $state = 'before DOCTYPE name';
                    }
                break;
                case 'before DOCTYPE name':
                    /* Consume the next input character: */
                    $char = $this->stream->char();
                    if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
                        /* U+0009 CHARACTER TABULATION
                           U+000A LINE FEED (LF)
                           U+000C FORM FEED (FF)
                           U+0020 SPACE
                        Stay in the before DOCTYPE name state. */
                    } elseif($char === '>') {
                        /* U+003E GREATER-THAN SIGN (>)
                        Parse error. Create a new DOCTYPE token. Set its
                        force-quirks flag to on. Emit the token. Switch to the
                        data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'expected-doctype-name-but-got-right-bracket'
                        ));
                        $this->emitToken(array(
                            'name' => '',
                            'type' => self::DOCTYPE,
                            'force-quirks' => true,
                            'error' => true
                        ));
                        $state = 'data';
                    } elseif('A' <= $char && $char <= 'Z') {
                        /* U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z
                        Create a new DOCTYPE token. Set the token's name to the
                        lowercase version of the input character (add 0x0020 to
                        the character's code point). Switch to the DOCTYPE name
                        state. */
                        $this->token = array(
                            'name' => strtolower($char),
                            'type' => self::DOCTYPE,
                            'error' => true
                        );
                        $state = 'DOCTYPE name';
                    } elseif($char === false) {
                        /* EOF
                        Parse error. Create a new DOCTYPE token. Set its
                        force-quirks flag to on. Emit the token. Reconsume the
                        EOF character in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'expected-doctype-name-but-got-eof'
                        ));
                        $this->emitToken(array(
                            'name' => '',
                            'type' => self::DOCTYPE,
                            'force-quirks' => true,
                            'error' => true
                        ));
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* Anything else
                        Create a new DOCTYPE token. Set the token's name to the
                        current input character. Switch to the DOCTYPE name state. */
                        $this->token = array(
                            'name' => $char,
                            'type' => self::DOCTYPE,
                            'error' => true
                        );
                        $state = 'DOCTYPE name';
                    }
                break;
                case 'DOCTYPE name':
                    /* Consume the next input character: */
                    $char = $this->stream->char();
                    if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
                        /* U+0009 CHARACTER TABULATION
                           U+000A LINE FEED (LF)
                           U+000C FORM FEED (FF)
                           U+0020 SPACE
                        Switch to the after DOCTYPE name state. */
                        $state = 'after DOCTYPE name';
                    } elseif($char === '>') {
                        /* U+003E GREATER-THAN SIGN (>)
                        Emit the current DOCTYPE token. Switch to the data state. */
                        $this->emitToken($this->token);
                        $state = 'data';
                    } elseif('A' <= $char && $char <= 'Z') {
                        /* U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z
                        Append the lowercase version of the input character
                        (add 0x0020 to the character's code point) to the current
                        DOCTYPE token's name. Stay in the DOCTYPE name state. */
                        $this->token['name'] .= strtolower($char);
                    } elseif($char === false) {
                        /* EOF
                        Parse error. Set the DOCTYPE token's force-quirks flag
                        to on. Emit that DOCTYPE token. Reconsume the EOF
                        character in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'eof-in-doctype-name'
                        ));
                        $this->token['force-quirks'] = true;
                        $this->emitToken($this->token);
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* Anything else
                        Append the current input character to the current
                        DOCTYPE token's name. Stay in the DOCTYPE name state. */
                        $this->token['name'] .= $char;
                    }
                    // XXX this is probably some sort of quirks mode designation,
                    // check tree-builder to be sure. In general 'error' needs
                    // to be specc'ified, this probably means removing it at the end
                    $this->token['error'] = ($this->token['name'] === 'HTML')
                        ? false
                        : true;
                break;
                case 'after DOCTYPE name':
                    /* Consume the next input character: */
                    $char = $this->stream->char();
                    if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
                        /* U+0009 CHARACTER TABULATION
                           U+000A LINE FEED (LF)
                           U+000C FORM FEED (FF)
                           U+0020 SPACE
                        Stay in the after DOCTYPE name state. */
                    } elseif($char === '>') {
                        /* U+003E GREATER-THAN SIGN (>)
                        Emit the current DOCTYPE token. Switch to the data state. */
                        $this->emitToken($this->token);
                        $state = 'data';
                    } elseif($char === false) {
                        /* EOF
                        Parse error. Set the DOCTYPE token's force-quirks flag
                        to on. Emit that DOCTYPE token. Reconsume the EOF
                        character in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'eof-in-doctype'
                        ));
                        $this->token['force-quirks'] = true;
                        $this->emitToken($this->token);
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* Anything else */
                        $nextSix = strtoupper($char . $this->stream->charsWhile(self::ALPHA, 5));
                        if ($nextSix === 'PUBLIC') {
                            /* If the next six characters are an ASCII
                            case-insensitive match for the word "PUBLIC", then
                            consume those characters and switch to the before
                            DOCTYPE public identifier state. */
                            $state = 'before DOCTYPE public identifier';
                        } elseif ($nextSix === 'SYSTEM') {
                            /* Otherwise, if the next six characters are an ASCII
                            case-insensitive match for the word "SYSTEM", then
                            consume those characters and switch to the before
                            DOCTYPE system identifier state. */
                            $state = 'before DOCTYPE system identifier';
                        } else {
                            /* Otherwise, this is the parse error. Set the DOCTYPE
                            token's force-quirks flag to on. Switch to the bogus
                            DOCTYPE state. */
                            $this->emitToken(array(
                                'type' => self::PARSEERROR,
                                'data' => 'expected-space-or-right-bracket-in-doctype'
                            ));
                            $this->token['force-quirks'] = true;
                            $this->token['error'] = true;
                            $state = 'bogus DOCTYPE';
                        }
                    }
                break;
                case 'before DOCTYPE public identifier':
                    /* Consume the next input character: */
                    $char = $this->stream->char();
                    if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
                        /* U+0009 CHARACTER TABULATION
                           U+000A LINE FEED (LF)
                           U+000C FORM FEED (FF)
                           U+0020 SPACE
                        Stay in the before DOCTYPE public identifier state. */
                    } elseif ($char === '"') {
                        /* U+0022 QUOTATION MARK (")
                        Set the DOCTYPE token's public identifier to the empty
                        string (not missing), then switch to the DOCTYPE public
                        identifier (double-quoted) state. */
                        $this->token['public'] = '';
                        $state = 'DOCTYPE public identifier (double-quoted)';
                    } elseif ($char === "'") {
                        /* U+0027 APOSTROPHE (')
                        Set the DOCTYPE token's public identifier to the empty
                        string (not missing), then switch to the DOCTYPE public
                        identifier (single-quoted) state. */
                        $this->token['public'] = '';
                        $state = 'DOCTYPE public identifier (single-quoted)';
                    } elseif ($char === '>') {
                        /* Parse error. Set the DOCTYPE token's force-quirks flag
                        to on. Emit that DOCTYPE token. Switch to the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'unexpected-end-of-doctype'
                        ));
                        $this->token['force-quirks'] = true;
                        $this->emitToken($this->token);
                        $state = 'data';
                    } elseif ($char === false) {
                        /* Parse error. Set the DOCTYPE token's force-quirks
                        flag to on. Emit that DOCTYPE token. Reconsume the EOF
                        character in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'eof-in-doctype'
                        ));
                        $this->token['force-quirks'] = true;
                        $this->emitToken($this->token);
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* Parse error. Set the DOCTYPE token's force-quirks flag
                        to on. Switch to the bogus DOCTYPE state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'unexpected-char-in-doctype'
                        ));
                        $this->token['force-quirks'] = true;
                        $state = 'bogus DOCTYPE';
                    }
                break;
                case 'DOCTYPE public identifier (double-quoted)':
                    /* Consume the next input character: */
                    $char = $this->stream->char();
                    if ($char === '"') {
                        /* U+0022 QUOTATION MARK (")
                        Switch to the after DOCTYPE public identifier state. */
                        $state = 'after DOCTYPE public identifier';
                    } elseif ($char === '>') {
                        /* U+003E GREATER-THAN SIGN (>)
                        Parse error. Set the DOCTYPE token's force-quirks flag
                        to on. Emit that DOCTYPE token. Switch to the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'unexpected-end-of-doctype'
                        ));
                        $this->token['force-quirks'] = true;
                        $this->emitToken($this->token);
                        $state = 'data';
                    } elseif ($char === false) {
                        /* EOF
                        Parse error. Set the DOCTYPE token's force-quirks flag
                        to on. Emit that DOCTYPE token. Reconsume the EOF
                        character in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'eof-in-doctype'
                        ));
                        $this->token['force-quirks'] = true;
                        $this->emitToken($this->token);
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* Anything else
                        Append the current input character to the current
                        DOCTYPE token's public identifier. Stay in the DOCTYPE
                        public identifier (double-quoted) state. */
                        $this->token['public'] .= $char;
                    }
                break;
                case 'DOCTYPE public identifier (single-quoted)':
                    /* Consume the next input character: */
                    $char = $this->stream->char();
                    if ($char === "'") {
                        /* U+0027 APOSTROPHE (')
                        Switch to the after DOCTYPE public identifier state. */
                        $state = 'after DOCTYPE public identifier';
                    } elseif ($char === '>') {
                        /* U+003E GREATER-THAN SIGN (>)
                        Parse error. Set the DOCTYPE token's force-quirks flag
                        to on. Emit that DOCTYPE token. Switch to the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'unexpected-end-of-doctype'
                        ));
                        $this->token['force-quirks'] = true;
                        $this->emitToken($this->token);
                        $state = 'data';
                    } elseif ($char === false) {
                        /* EOF
                        Parse error. Set the DOCTYPE token's force-quirks flag
                        to on. Emit that DOCTYPE token. Reconsume the EOF
                        character in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'eof-in-doctype'
                        ));
                        $this->token['force-quirks'] = true;
                        $this->emitToken($this->token);
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* Anything else
                        Append the current input character to the current
                        DOCTYPE token's public identifier. Stay in the DOCTYPE
                        public identifier (double-quoted) state. */
                        $this->token['public'] .= $char;
                    }
                break;
                case 'after DOCTYPE public identifier':
                    /* Consume the next input character: */
                    $char = $this->stream->char();
                    if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
                        /* U+0009 CHARACTER TABULATION
                           U+000A LINE FEED (LF)
                           U+000C FORM FEED (FF)
                           U+0020 SPACE
                        Stay in the after DOCTYPE public identifier state. */
                    } elseif ($char === '"') {
                        /* U+0022 QUOTATION MARK (")
                        Set the DOCTYPE token's system identifier to the
                        empty string (not missing), then switch to the DOCTYPE
                        system identifier (double-quoted) state. */
                        $this->token['system'] = '';
                        $state = 'DOCTYPE system identifier (double-quoted)';
                    } elseif ($char === "'") {
                        /* U+0027 APOSTROPHE (')
                        Set the DOCTYPE token's system identifier to the
                        empty string (not missing), then switch to the DOCTYPE
                        system identifier (single-quoted) state. */
                        $this->token['system'] = '';
                        $state = 'DOCTYPE system identifier (single-quoted)';
                    } elseif ($char === '>') {
                        /* U+003E GREATER-THAN SIGN (>)
                        Emit the current DOCTYPE token. Switch to the data state. */
                        $this->emitToken($this->token);
                        $state = 'data';
                    } elseif ($char === false) {
                        /* Parse error. Set the DOCTYPE token's force-quirks
                        flag to on. Emit that DOCTYPE token. Reconsume the EOF
                        character in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'eof-in-doctype'
                        ));
                        $this->token['force-quirks'] = true;
                        $this->emitToken($this->token);
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* Anything else
                        Parse error. Set the DOCTYPE token's force-quirks flag
                        to on. Switch to the bogus DOCTYPE state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'unexpected-char-in-doctype'
                        ));
                        $this->token['force-quirks'] = true;
                        $state = 'bogus DOCTYPE';
                    }
                break;
                case 'before DOCTYPE system identifier':
                    /* Consume the next input character: */
                    $char = $this->stream->char();
                    if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
                        /* U+0009 CHARACTER TABULATION
                           U+000A LINE FEED (LF)
                           U+000C FORM FEED (FF)
                           U+0020 SPACE
                        Stay in the before DOCTYPE system identifier state. */
                    } elseif ($char === '"') {
                        /* U+0022 QUOTATION MARK (")
                        Set the DOCTYPE token's system identifier to the empty
                        string (not missing), then switch to the DOCTYPE system
                        identifier (double-quoted) state. */
                        $this->token['system'] = '';
                        $state = 'DOCTYPE system identifier (double-quoted)';
                    } elseif ($char === "'") {
                        /* U+0027 APOSTROPHE (')
                        Set the DOCTYPE token's system identifier to the empty
                        string (not missing), then switch to the DOCTYPE system
                        identifier (single-quoted) state. */
                        $this->token['system'] = '';
                        $state = 'DOCTYPE system identifier (single-quoted)';
                    } elseif ($char === '>') {
                        /* Parse error. Set the DOCTYPE token's force-quirks flag
                        to on. Emit that DOCTYPE token. Switch to the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'unexpected-char-in-doctype'
                        ));
                        $this->token['force-quirks'] = true;
                        $this->emitToken($this->token);
                        $state = 'data';
                    } elseif ($char === false) {
                        /* Parse error. Set the DOCTYPE token's force-quirks
                        flag to on. Emit that DOCTYPE token. Reconsume the EOF
                        character in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'eof-in-doctype'
                        ));
                        $this->token['force-quirks'] = true;
                        $this->emitToken($this->token);
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* Parse error. Set the DOCTYPE token's force-quirks flag
                        to on. Switch to the bogus DOCTYPE state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'unexpected-char-in-doctype'
                        ));
                        $this->token['force-quirks'] = true;
                        $state = 'bogus DOCTYPE';
                    }
                break;
                case 'DOCTYPE system identifier (double-quoted)':
                    /* Consume the next input character: */
                    $char = $this->stream->char();
                    if ($char === '"') {
                        /* U+0022 QUOTATION MARK (")
                        Switch to the after DOCTYPE system identifier state. */
                        $state = 'after DOCTYPE system identifier';
                    } elseif ($char === '>') {
                        /* U+003E GREATER-THAN SIGN (>)
                        Parse error. Set the DOCTYPE token's force-quirks flag
                        to on. Emit that DOCTYPE token. Switch to the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'unexpected-end-of-doctype'
                        ));
                        $this->token['force-quirks'] = true;
                        $this->emitToken($this->token);
                        $state = 'data';
                    } elseif ($char === false) {
                        /* EOF
                        Parse error. Set the DOCTYPE token's force-quirks flag
                        to on. Emit that DOCTYPE token. Reconsume the EOF
                        character in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'eof-in-doctype'
                        ));
                        $this->token['force-quirks'] = true;
                        $this->emitToken($this->token);
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* Anything else
                        Append the current input character to the current
                        DOCTYPE token's system identifier. Stay in the DOCTYPE
                        system identifier (double-quoted) state. */
                        $this->token['system'] .= $char;
                    }
                break;
                case 'DOCTYPE system identifier (single-quoted)':
                    /* Consume the next input character: */
                    $char = $this->stream->char();
                    if ($char === "'") {
                        /* U+0027 APOSTROPHE (')
                        Switch to the after DOCTYPE system identifier state. */
                        $state = 'after DOCTYPE system identifier';
                    } elseif ($char === '>') {
                        /* U+003E GREATER-THAN SIGN (>)
                        Parse error. Set the DOCTYPE token's force-quirks flag
                        to on. Emit that DOCTYPE token. Switch to the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'unexpected-end-of-doctype'
                        ));
                        $this->token['force-quirks'] = true;
                        $this->emitToken($this->token);
                        $state = 'data';
                    } elseif ($char === false) {
                        /* EOF
                        Parse error. Set the DOCTYPE token's force-quirks flag
                        to on. Emit that DOCTYPE token. Reconsume the EOF
                        character in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'eof-in-doctype'
                        ));
                        $this->token['force-quirks'] = true;
                        $this->emitToken($this->token);
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* Anything else
                        Append the current input character to the current
                        DOCTYPE token's system identifier. Stay in the DOCTYPE
                        system identifier (double-quoted) state. */
                        $this->token['system'] .= $char;
                    }
                break;
                case 'after DOCTYPE system identifier':
                    /* Consume the next input character: */
                    $char = $this->stream->char();
                    if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
                        /* U+0009 CHARACTER TABULATION
                           U+000A LINE FEED (LF)
                           U+000C FORM FEED (FF)
                           U+0020 SPACE
                        Stay in the after DOCTYPE system identifier state. */
                    } elseif ($char === '>') {
                        /* U+003E GREATER-THAN SIGN (>)
                        Emit the current DOCTYPE token. Switch to the data state. */
                        $this->emitToken($this->token);
                        $state = 'data';
                    } elseif ($char === false) {
                        /* Parse error. Set the DOCTYPE token's force-quirks
                        flag to on. Emit that DOCTYPE token. Reconsume the EOF
                        character in the data state. */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'eof-in-doctype'
                        ));
                        $this->token['force-quirks'] = true;
                        $this->emitToken($this->token);
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* Anything else
                        Parse error. Switch to the bogus DOCTYPE state.
                        (This does not set the DOCTYPE token's force-quirks
                        flag to on.) */
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'unexpected-char-in-doctype'
                        ));
                        $state = 'bogus DOCTYPE';
                    }
                break;
                case 'bogus DOCTYPE':
                    /* Consume the next input character: */
                    $char = $this->stream->char();
                    if ($char === '>') {
                        /* U+003E GREATER-THAN SIGN (>)
                        Emit the DOCTYPE token. Switch to the data state. */
                        $this->emitToken($this->token);
                        $state = 'data';
                    } elseif($char === false) {
                        /* EOF
                        Emit the DOCTYPE token. Reconsume the EOF character in
                        the data state. */
                        $this->emitToken($this->token);
                        $this->stream->unget();
                        $state = 'data';
                    } else {
                        /* Anything else
                        Stay in the bogus DOCTYPE state. */
                    }
                break;
                // case 'cdataSection':
            }
        }
    }
    /**
     * Returns a serialized representation of the tree.
     */
    public function save() {
        return $this->tree->save();
    }
    /**
     * Returns the input stream.
     */
    public function stream() {
        return $this->stream;
    }
    private function consumeCharacterReference($allowed = false, $inattr = false) {
        // This goes quite far against spec, and is far closer to the Python
        // impl., mainly because we don't do the large unconsuming the spec
        // requires.
        // All consumed characters.
        $chars = $this->stream->char();
        /* This section defines how to consume a character
        reference. This definition is used when parsing character
        references in text and in attributes.
        The behavior depends on the identity of the next character
        (the one immediately after the U+0026 AMPERSAND character): */
        if (
            $chars[0] === "\x09" ||
            $chars[0] === "\x0A" ||
            $chars[0] === "\x0C" ||
            $chars[0] === "\x20" ||
            $chars[0] === '<' ||
            $chars[0] === '&' ||
            $chars === false ||
            $chars[0] === $allowed
        ) {
            /* U+0009 CHARACTER TABULATION
               U+000A LINE FEED (LF)
               U+000C FORM FEED (FF)
               U+0020 SPACE
               U+003C LESS-THAN SIGN
               U+0026 AMPERSAND
               EOF
               The additional allowed character, if there is one
            Not a character reference. No characters are consumed,
            and nothing is returned. (This is not an error, either.) */
            // We already consumed, so unconsume.
            $this->stream->unget();
            return '&';
        } elseif ($chars[0] === '#') {
            /* Consume the U+0023 NUMBER SIGN. */
            // Um, yeah, we already did that.
            /* The behavior further depends on the character after
            the U+0023 NUMBER SIGN: */
            $chars .= $this->stream->char();
            if (isset($chars[1]) && ($chars[1] === 'x' || $chars[1] === 'X')) {
                /* U+0078 LATIN SMALL LETTER X
                   U+0058 LATIN CAPITAL LETTER X */
                /* Consume the X. */
                // Um, yeah, we already did that.
                /* Follow the steps below, but using the range of
                characters U+0030 DIGIT ZERO through to U+0039 DIGIT
                NINE, U+0061 LATIN SMALL LETTER A through to U+0066
                LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER
                A, through to U+0046 LATIN CAPITAL LETTER F (in other
                words, 0123456789, ABCDEF, abcdef). */
                $char_class = self::HEX;
                /* When it comes to interpreting the
                number, interpret it as a hexadecimal number. */
                $hex = true;
            } else {
                /* Anything else */
                // Unconsume because we shouldn't have consumed this.
                $chars = $chars[0];
                $this->stream->unget();
                /* Follow the steps below, but using the range of
                characters U+0030 DIGIT ZERO through to U+0039 DIGIT
                NINE (i.e. just 0123456789). */
                $char_class = self::DIGIT;
                /* When it comes to interpreting the number,
                interpret it as a decimal number. */
                $hex = false;
            }
            /* Consume as many characters as match the range of characters given above. */
            $consumed = $this->stream->charsWhile($char_class);
            if ($consumed === '' || $consumed === false) {
                /* If no characters match the range, then don't consume
                any characters (and unconsume the U+0023 NUMBER SIGN
                character and, if appropriate, the X character). This
                is a parse error; nothing is returned. */
                $this->emitToken(array(
                    'type' => self::PARSEERROR,
                    'data' => 'expected-numeric-entity'
                ));
                return '&' . $chars;
            } else {
                /* Otherwise, if the next character is a U+003B SEMICOLON,
                consume that too. If it isn't, there is a parse error. */
                if ($this->stream->char() !== ';') {
                    $this->stream->unget();
                    $this->emitToken(array(
                        'type' => self::PARSEERROR,
                        'data' => 'numeric-entity-without-semicolon'
                    ));
                }
                /* If one or more characters match the range, then take
                them all and interpret the string of characters as a number
                (either hexadecimal or decimal as appropriate). */
                $codepoint = $hex ? hexdec($consumed) : (int) $consumed;
                /* If that number is one of the numbers in the first column
                of the following table, then this is a parse error. Find the
                row with that number in the first column, and return a
                character token for the Unicode character given in the
                second column of that row. */
                $new_codepoint = HTML5_Data::getRealCodepoint($codepoint);
                if ($new_codepoint) {
                    $this->emitToken(array(
                        'type' => self::PARSEERROR,
                        'data' => 'illegal-windows-1252-entity'
                    ));
                    return HTML5_Data::utf8chr($new_codepoint);
                } else {
                    /* Otherwise, if the number is greater than 0x10FFFF, then 
                     * this is a parse error. Return a U+FFFD REPLACEMENT 
                     * CHARACTER. */
                    if ($codepoint > 0x10FFFF) {
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'overlong-character-entity' // XXX probably not correct
                        ));
                        return "\xEF\xBF\xBD";
                    }
                    /* Otherwise, return a character token for the Unicode 
                     * character whose code point is that number.  If the 
                     * number is in the range 0x0001 to 0x0008,    0x000E to 
                     * 0x001F,  0x007F  to 0x009F, 0xD800 to 0xDFFF, 0xFDD0 to 
                     * 0xFDEF, or is one of 0x000B, 0xFFFE, 0xFFFF, 0x1FFFE, 
                     * 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, 0x4FFFE, 
                     * 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, 
                     * 0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 
                     * 0xAFFFF, 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, 
                     * 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE, 
                     * or 0x10FFFF, then this is a parse error. */
                    // && has higher precedence than ||
                    if (
                        $codepoint >= 0x0000 && $codepoint <= 0x0008 ||
                        $codepoint === 0x000B ||
                        $codepoint >= 0x000E && $codepoint <= 0x001F ||
                        $codepoint >= 0x007F && $codepoint <= 0x009F ||
                        $codepoint >= 0xD800 && $codepoint <= 0xDFFF ||
                        $codepoint >= 0xFDD0 && $codepoint <= 0xFDEF ||
                        ($codepoint & 0xFFFE) === 0xFFFE ||
                        $codepoint == 0x10FFFF || $codepoint == 0x10FFFE
                    ) {
                        $this->emitToken(array(
                            'type' => self::PARSEERROR,
                            'data' => 'illegal-codepoint-for-numeric-entity'
                        ));
                    }
                    return HTML5_Data::utf8chr($codepoint);
                }
            }
        } else {
            /* Anything else */
            /* Consume the maximum number of characters possible,
            with the consumed characters matching one of the
            identifiers in the first column of the named character
            references table (in a case-sensitive manner). */
            // What we actually do here is consume as much as we can while it
            // matches the start of one of the identifiers in the first column.
            $refs = HTML5_Data::getNamedCharacterReferences();
            // Get the longest string which is the start of an identifier
            // ($chars) as well as the longest identifier which matches ($id)
            // and its codepoint ($codepoint).
            $codepoint = false;
            $char = $chars;
            while ($char !== false && isset($refs[$char])) {
                $refs = $refs[$char];
                if (isset($refs['codepoint'])) {
                    $id = $chars;
                    $codepoint = $refs['codepoint'];
                }
                $chars .= $char = $this->stream->char();
            }
            // Unconsume the one character we just took which caused the while
            // statement to fail. This could be anything and could cause state
            // changes (as if it matches the while loop it must be
            // alphanumeric so we can just concat it to whatever we get later).
            $this->stream->unget();
            if ($char !== false) {
                $chars = substr($chars, 0, -1);
            }
            /* If no match can be made, then this is a parse error.
            No characters are consumed, and nothing is returned. */
            if (!$codepoint) {
                $this->emitToken(array(
                    'type' => self::PARSEERROR,
                    'data' => 'expected-named-entity'
                ));
                return '&' . $chars;
            }
            /* If the last character matched is not a U+003B SEMICOLON
            (;), there is a parse error. */
            $semicolon = true;
            if (substr($id, -1) !== ';') {
                $this->emitToken(array(
                    'type' => self::PARSEERROR,
                    'data' => 'named-entity-without-semicolon'
                ));
                $semicolon = false;
            }
            /* If the character reference is being consumed as part of
            an attribute, and the last character matched is not a
            U+003B SEMICOLON (;), and the next character is in the
            range U+0030 DIGIT ZERO to U+0039 DIGIT NINE, U+0041
            LATIN CAPITAL LETTER A to U+005A LATIN CAPITAL LETTER Z,
            or U+0061 LATIN SMALL LETTER A to U+007A LATIN SMALL LETTER Z,
            then, for historical reasons, all the characters that were
            matched after the U+0026 AMPERSAND (&) must be unconsumed,
            and nothing is returned. */
            if ($inattr && !$semicolon) {
                // The next character is either the next character in $chars or in the stream.
                if (strlen($chars) > strlen($id)) {
                    $next = substr($chars, strlen($id), 1);
                } else {
                    $next = $this->stream->char();
                    $this->stream->unget();
                }
                if (
                    '0' <= $next && $next <= '9' ||
                    'A' <= $next && $next <= 'Z' ||
                    'a' <= $next && $next <= 'z'
                ) {
                    return '&' . $chars;
                }
            }
            /* Otherwise, return a character token for the character
            corresponding to the character reference name (as given
            by the second column of the named character references table). */
            return HTML5_Data::utf8chr($codepoint) . substr($chars, strlen($id));
        }
    }
    private function characterReferenceInAttributeValue($allowed = false) {
        /* Attempt to consume a character reference. */
        $entity = $this->consumeCharacterReference($allowed, true);
        /* If nothing is returned, append a U+0026 AMPERSAND
        character to the current attribute's value.
        Otherwise, append the returned character token to the
        current attribute's value. */
        $char = (!$entity)
            ? '&'
            : $entity;
        $last = count($this->token['attr']) - 1;
        $this->token['attr'][$last]['value'] .= $char;
        /* Finally, switch back to the attribute value state that you
        were in when were switched into this state. */
    }
    /**
     * Emits a token, passing it on to the tree builder.
     */
    protected function emitToken($token, $checkStream = true, $dry = false) {
        if ($checkStream) {
            // Emit errors from input stream.
            while ($this->stream->errors) {
                $this->emitToken(array_shift($this->stream->errors), false);
            }
        }
        if($token['type'] === self::ENDTAG && !empty($token['attr'])) {
            for ($i = 0; $i < count($token['attr']); $i++) {
                $this->emitToken(array(
                    'type' => self::PARSEERROR,
                    'data' => 'attributes-in-end-tag'
                ));
            }
        }
        if($token['type'] === self::ENDTAG && !empty($token['self-closing'])) {
            $this->emitToken(array(
                'type' => self::PARSEERROR,
                'data' => 'self-closing-flag-on-end-tag',
            ));
        }
        if($token['type'] === self::STARTTAG) {
            // This could be changed to actually pass the tree-builder a hash
            $hash = array();
            foreach ($token['attr'] as $keypair) {
                if (isset($hash[$keypair['name']])) {
                    $this->emitToken(array(
                        'type' => self::PARSEERROR,
                        'data' => 'duplicate-attribute',
                    ));
                } else {
                    $hash[$keypair['name']] = $keypair['value'];
                }
            }
        }
        if(!$dry) {
            // the current structure of attributes is not a terribly good one
            $this->tree->emitToken($token);
        }
        if(!$dry && is_int($this->tree->content_model)) {
            $this->content_model = $this->tree->content_model;
            $this->tree->content_model = null;
        } elseif($token['type'] === self::ENDTAG) {
            $this->content_model = self::PCDATA;
        }
    }
 }
--- a/thirdparty/html5lib/HTML5/TreeBuilder.php
+++ b/thirdparty/html5lib/HTML5/TreeBuilder.php
@ -1,3841 +0,0 @@
 <?php
 /*
 Copyright 2007 Jeroen van der Meer <http://jero.net/>
 Copyright 2009 Edward Z. Yang <edwardzyang@thewritingpot.com>
 Permission is hereby granted, free of charge, to any person obtaining a
 copy of this software and associated documentation files (the
 "Software"), to deal in the Software without restriction, including
 without limitation the rights to use, copy, modify, merge, publish,
 distribute, sublicense, and/or sell copies of the Software, and to
 permit persons to whom the Software is furnished to do so, subject to
 the following conditions:
 The above copyright notice and this permission notice shall be included
 in all copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
 // Tags for FIX ME!!!: (in order of priority)
 //      XXX - should be fixed NAO!
 //      XERROR - with regards to parse errors
 //      XSCRIPT - with regards to scripting mode
 //      XENCODING - with regards to encoding (for reparsing tests)
 //      XDOM - DOM specific code (tagName is explicitly not marked).
 //          this is not (yet) in helper functions.
 class HTML5_TreeBuilder {
    public $stack = array();
    public $context;
    public $content_model;
    private $mode;
    private $original_mode;
    private $secondary_mode;
    private $dom;
    // Whether or not normal insertion of nodes should actually foster
    // parent (used in one case in spec)
    private $foster_parent = false;
    private $a_formatting  = array();
    private $head_pointer = null;
    private $form_pointer = null;
    private $flag_frameset_ok = true;
    private $flag_force_quirks = false;
    private $ignored = false;
    private $quirks_mode = null;
    // this gets to 2 when we want to ignore the next lf character, and
    // is decrement at the beginning of each processed token (this way,
    // code can check for (bool)$ignore_lf_token, but it phases out
    // appropriately)
    private $ignore_lf_token = 0;
    private $fragment = false;
    private $root;
    private $scoping = array('applet','button','caption','html','marquee','object','table','td','th', 'svg:foreignObject');
    private $formatting = array('a','b','big','code','em','font','i','nobr','s','small','strike','strong','tt','u');
    // dl and ds are speculative
    private $special = array('address','area','article','aside','base','basefont','bgsound',
    'blockquote','body','br','center','col','colgroup','command','dc','dd','details','dir','div','dl','ds',
    'dt','embed','fieldset','figure','footer','form','frame','frameset','h1','h2','h3','h4','h5',
    'h6','head','header','hgroup','hr','iframe','img','input','isindex','li','link',
    'listing','menu','meta','nav','noembed','noframes','noscript','ol',
    'p','param','plaintext','pre','script','select','spacer','style',
    'tbody','textarea','tfoot','thead','title','tr','ul','wbr');
    private $pendingTableCharacters;
    private $pendingTableCharactersDirty;
    // Tree construction modes
    const INITIAL           = 0;
    const BEFORE_HTML       = 1;
    const BEFORE_HEAD       = 2;
    const IN_HEAD           = 3;
    const IN_HEAD_NOSCRIPT  = 4;
    const AFTER_HEAD        = 5;
    const IN_BODY           = 6;
    const IN_CDATA_RCDATA   = 7;
    const IN_TABLE          = 8;
    const IN_TABLE_TEXT     = 9;
    const IN_CAPTION        = 10;
    const IN_COLUMN_GROUP   = 11;
    const IN_TABLE_BODY     = 12;
    const IN_ROW            = 13;
    const IN_CELL           = 14;
    const IN_SELECT         = 15;
    const IN_SELECT_IN_TABLE= 16;
    const IN_FOREIGN_CONTENT= 17;
    const AFTER_BODY        = 18;
    const IN_FRAMESET       = 19;
    const AFTER_FRAMESET    = 20;
    const AFTER_AFTER_BODY  = 21;
    const AFTER_AFTER_FRAMESET = 22;
    /**
     * Converts a magic number to a readable name. Use for debugging.
     */
    private function strConst($number) {
        static $lookup;
        if (!$lookup) {
            $lookup = array();
            $r = new ReflectionClass('HTML5_TreeBuilder');
            $consts = $r->getConstants();
            foreach ($consts as $const => $num) {
                if (!is_int($num)) continue;
                $lookup[$num] = $const;
            }
        }
        return $lookup[$number];
    }
    // The different types of elements.
    const SPECIAL    = 100;
    const SCOPING    = 101;
    const FORMATTING = 102;
    const PHRASING   = 103;
    // Quirks modes in $quirks_mode
    const NO_QUIRKS             = 200;
    const QUIRKS_MODE           = 201;
    const LIMITED_QUIRKS_MODE   = 202;
    // Marker to be placed in $a_formatting
    const MARKER     = 300;
    // Namespaces for foreign content
    const NS_HTML   = null; // to prevent DOM from requiring NS on everything
    const NS_MATHML = 'http://www.w3.org/1998/Math/MathML';
    const NS_SVG    = 'http://www.w3.org/2000/svg';
    const NS_XLINK  = 'http://www.w3.org/1999/xlink';
    const NS_XML    = 'http://www.w3.org/XML/1998/namespace';
    const NS_XMLNS  = 'http://www.w3.org/2000/xmlns/';
    // Different types of scopes to test for elements
    const SCOPE = 0;
    const SCOPE_LISTITEM = 1;
    const SCOPE_TABLE = 2;
    public function __construct() {
        $this->mode = self::INITIAL;
        $this->dom = new DOMDocument;
        $this->dom->encoding = 'UTF-8';
        $this->dom->preserveWhiteSpace = true;
        $this->dom->substituteEntities = true;
        $this->dom->strictErrorChecking = false;
    }
    // Process tag tokens
    public function emitToken($token, $mode = null) {
        // XXX: ignore parse errors... why are we emitting them, again?
        if ($token['type'] === HTML5_Tokenizer::PARSEERROR) return;
        if ($mode === null) $mode = $this->mode;
        /*
        $backtrace = debug_backtrace();
        if ($backtrace[1]['class'] !== 'HTML5_TreeBuilder') echo "--\n";
        echo $this->strConst($mode);
        if ($this->original_mode) echo " (originally ".$this->strConst($this->original_mode).")";
        echo "\n  ";
        token_dump($token);
        $this->printStack();
        $this->printActiveFormattingElements();
        if ($this->foster_parent) echo "  -> this is a foster parent mode\n";
        if ($this->flag_frameset_ok) echo "  -> frameset ok\n";
        */
        if ($this->ignore_lf_token) $this->ignore_lf_token--;
        $this->ignored = false;
        // indenting is a little wonky, this can be changed later on
        switch ($mode) {
    case self::INITIAL:
        /* A character token that is one of U+0009 CHARACTER TABULATION,
         * U+000A LINE FEED (LF), U+000C FORM FEED (FF),  or U+0020 SPACE */
        if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
            /* Ignore the token. */
            $this->ignored = true;
        } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
            if (
                $token['name'] !== 'html' || !empty($token['public']) ||
                !empty($token['system']) || $token !== 'about:legacy-compat'
            ) {
                /* If the DOCTYPE token's name is not a case-sensitive match
                 * for the string "html", or if the token's public identifier
                 * is not missing, or if the token's system identifier is
                 * neither missing nor a case-sensitive match for the string
                 * "about:legacy-compat", then there is a parse error (this
                 * is the DOCTYPE parse error). */
                // DOCTYPE parse error
            }
            /* Append a DocumentType node to the Document node, with the name
             * attribute set to the name given in the DOCTYPE token, or the
             * empty string if the name was missing; the publicId attribute
             * set to the public identifier given in the DOCTYPE token, or
             * the empty string if the public identifier was missing; the
             * systemId attribute set to the system identifier given in the
             * DOCTYPE token, or the empty string if the system identifier
             * was missing; and the other attributes specific to
             * DocumentType objects set to null and empty lists as
             * appropriate. Associate the DocumentType node with the
             * Document object so that it is returned as the value of the
             * doctype attribute of the Document object. */
            if (!isset($token['public'])) $token['public'] = null;
            if (!isset($token['system'])) $token['system'] = null;
            // XDOM
            // Yes this is hacky. I'm kind of annoyed that I can't appendChild
            // a doctype to DOMDocument. Maybe I haven't chanted the right
            // syllables.
            $impl = new DOMImplementation();
            // This call can fail for particularly pathological cases (namely,
            // the qualifiedName parameter ($token['name']) could be missing.
            if ($token['name']) {
                $doctype = $impl->createDocumentType($token['name'], $token['public'], $token['system']);
                $this->dom->appendChild($doctype);
            } else {
                // It looks like libxml's not actually *able* to express this case.
                // So... don't.
                $this->dom->emptyDoctype = true;
            }
            $public = is_null($token['public']) ? false : strtolower($token['public']);
            $system = is_null($token['system']) ? false : strtolower($token['system']);
            $publicStartsWithForQuirks = array(
             "+//silmaril//dtd html pro v0r11 19970101//",
             "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
             "-//as//dtd html 3.0 aswedit + extensions//",
             "-//ietf//dtd html 2.0 level 1//",
             "-//ietf//dtd html 2.0 level 2//",
             "-//ietf//dtd html 2.0 strict level 1//",
             "-//ietf//dtd html 2.0 strict level 2//",
             "-//ietf//dtd html 2.0 strict//",
             "-//ietf//dtd html 2.0//",
             "-//ietf//dtd html 2.1e//",
             "-//ietf//dtd html 3.0//",
             "-//ietf//dtd html 3.2 final//",
             "-//ietf//dtd html 3.2//",
             "-//ietf//dtd html 3//",
             "-//ietf//dtd html level 0//",
             "-//ietf//dtd html level 1//",
             "-//ietf//dtd html level 2//",
             "-//ietf//dtd html level 3//",
             "-//ietf//dtd html strict level 0//",
             "-//ietf//dtd html strict level 1//",
             "-//ietf//dtd html strict level 2//",
             "-//ietf//dtd html strict level 3//",
             "-//ietf//dtd html strict//",
             "-//ietf//dtd html//",
             "-//metrius//dtd metrius presentational//",
             "-//microsoft//dtd internet explorer 2.0 html strict//",
             "-//microsoft//dtd internet explorer 2.0 html//",
             "-//microsoft//dtd internet explorer 2.0 tables//",
             "-//microsoft//dtd internet explorer 3.0 html strict//",
             "-//microsoft//dtd internet explorer 3.0 html//",
             "-//microsoft//dtd internet explorer 3.0 tables//",
             "-//netscape comm. corp.//dtd html//",
             "-//netscape comm. corp.//dtd strict html//",
             "-//o'reilly and associates//dtd html 2.0//",
             "-//o'reilly and associates//dtd html extended 1.0//",
             "-//o'reilly and associates//dtd html extended relaxed 1.0//",
             "-//spyglass//dtd html 2.0 extended//",
             "-//sq//dtd html 2.0 hotmetal + extensions//",
             "-//sun microsystems corp.//dtd hotjava html//",
             "-//sun microsystems corp.//dtd hotjava strict html//",
             "-//w3c//dtd html 3 1995-03-24//",
             "-//w3c//dtd html 3.2 draft//",
             "-//w3c//dtd html 3.2 final//",
             "-//w3c//dtd html 3.2//",
             "-//w3c//dtd html 3.2s draft//",
             "-//w3c//dtd html 4.0 frameset//",
             "-//w3c//dtd html 4.0 transitional//",
             "-//w3c//dtd html experimental 19960712//",
             "-//w3c//dtd html experimental 970421//",
             "-//w3c//dtd w3 html//",
             "-//w3o//dtd w3 html 3.0//",
             "-//webtechs//dtd mozilla html 2.0//",
             "-//webtechs//dtd mozilla html//",
            );
            $publicSetToForQuirks = array(
             "-//w3o//dtd w3 html strict 3.0//",
             "-/w3c/dtd html 4.0 transitional/en",
             "html",
            );
            $publicStartsWithAndSystemForQuirks = array(
             "-//w3c//dtd html 4.01 frameset//",
             "-//w3c//dtd html 4.01 transitional//",
            );
            $publicStartsWithForLimitedQuirks = array(
             "-//w3c//dtd xhtml 1.0 frameset//",
             "-//w3c//dtd xhtml 1.0 transitional//",
            );
            $publicStartsWithAndSystemForLimitedQuirks = array(
             "-//w3c//dtd html 4.01 frameset//",
             "-//w3c//dtd html 4.01 transitional//",
            );
            // first, do easy checks
            if (
                !empty($token['force-quirks']) ||
                strtolower($token['name']) !== 'html'
            ) {
                $this->quirks_mode = self::QUIRKS_MODE;
            } else {
                do {
                    if ($system) {
                        foreach ($publicStartsWithAndSystemForQuirks as $x) {
                            if (strncmp($public, $x, strlen($x)) === 0) {
                                $this->quirks_mode = self::QUIRKS_MODE;
                                break;
                            }
                        }
                        if (!is_null($this->quirks_mode)) break;
                        foreach ($publicStartsWithAndSystemForLimitedQuirks as $x) {
                            if (strncmp($public, $x, strlen($x)) === 0) {
                                $this->quirks_mode = self::LIMITED_QUIRKS_MODE;
                                break;
                            }
                        }
                        if (!is_null($this->quirks_mode)) break;
                    }
                    foreach ($publicSetToForQuirks as $x) {
                        if ($public === $x) {
                            $this->quirks_mode = self::QUIRKS_MODE;
                            break;
                        }
                    }
                    if (!is_null($this->quirks_mode)) break;
                    foreach ($publicStartsWithForLimitedQuirks as $x) {
                        if (strncmp($public, $x, strlen($x)) === 0) {
                            $this->quirks_mode = self::LIMITED_QUIRKS_MODE;
                        }
                    }
                    if (!is_null($this->quirks_mode)) break;
                    if ($system === "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
                        $this->quirks_mode = self::QUIRKS_MODE;
                        break;
                    }
                    foreach ($publicStartsWithForQuirks as $x) {
                        if (strncmp($public, $x, strlen($x)) === 0) {
                            $this->quirks_mode = self::QUIRKS_MODE;
                            break;
                        }
                    }
                    if (is_null($this->quirks_mode)) {
                        $this->quirks_mode = self::NO_QUIRKS;
                    }
                } while (false);
            }
            $this->mode = self::BEFORE_HTML;
        } else {
            // parse error
            /* Switch the insertion mode to "before html", then reprocess the
             * current token. */
            $this->mode = self::BEFORE_HTML;
            $this->quirks_mode = self::QUIRKS_MODE;
            $this->emitToken($token);
        }
        break;
    case self::BEFORE_HTML:
        /* A DOCTYPE token */
        if($token['type'] === HTML5_Tokenizer::DOCTYPE) {
            // Parse error. Ignore the token.
            $this->ignored = true;
        /* A comment token */
        } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
            /* Append a Comment node to the Document object with the data
            attribute set to the data given in the comment token. */
            // XDOM
            $comment = $this->dom->createComment($token['data']);
            $this->dom->appendChild($comment);
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
        or U+0020 SPACE */
        } elseif($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
            /* Ignore the token. */
            $this->ignored = true;
        /* A start tag whose tag name is "html" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] == 'html') {
            /* Create an element for the token in the HTML namespace. Append it 
             * to the Document  object. Put this element in the stack of open 
             * elements. */
            // XDOM
            $html = $this->insertElement($token, false);
            $this->dom->appendChild($html);
            $this->stack[] = $html;
            $this->mode = self::BEFORE_HEAD;
        } else {
            /* Create an html element. Append it to the Document object. Put
             * this element in the stack of open elements. */
            // XDOM
            $html = $this->dom->createElementNS(self::NS_HTML, 'html');
            $this->dom->appendChild($html);
            $this->stack[] = $html;
            /* Switch the insertion mode to "before head", then reprocess the
             * current token. */
            $this->mode = self::BEFORE_HEAD;
            $this->emitToken($token);
        }
        break;
    case self::BEFORE_HEAD:
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
        or U+0020 SPACE */
        if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
            /* Ignore the token. */
            $this->ignored = true;
        /* A comment token */
        } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
            /* Append a Comment node to the current node with the data attribute
            set to the data given in the comment token. */
            $this->insertComment($token['data']);
        /* A DOCTYPE token */
        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
            /* Parse error. Ignore the token */
            $this->ignored = true;
            // parse error
        /* A start tag token with the tag name "html" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
            /* Process the token using the rules for the "in body"
             * insertion mode. */
            $this->processWithRulesFor($token, self::IN_BODY);
        /* A start tag token with the tag name "head" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') {
            /* Insert an HTML element for the token. */
            $element = $this->insertElement($token);
            /* Set the head element pointer to this new element node. */
            $this->head_pointer = $element;
            /* Change the insertion mode to "in head". */
            $this->mode = self::IN_HEAD;
        /* An end tag whose tag name is one of: "head", "body", "html", "br" */
        } elseif(
            $token['type'] === HTML5_Tokenizer::ENDTAG && (
                $token['name'] === 'head' || $token['name'] === 'body' ||
                $token['name'] === 'html' || $token['name'] === 'br'
        )) {
            /* Act as if a start tag token with the tag name "head" and no
             * attributes had been seen, then reprocess the current token. */
            $this->emitToken(array(
                'name' => 'head',
                'type' => HTML5_Tokenizer::STARTTAG,
                'attr' => array()
            ));
            $this->emitToken($token);
        /* Any other end tag */
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG) {
            /* Parse error. Ignore the token. */
            $this->ignored = true;
        } else {
            /* Act as if a start tag token with the tag name "head" and no
             * attributes had been seen, then reprocess the current token.
             * Note: This will result in an empty head element being
             * generated, with the current token being reprocessed in the
             * "after head" insertion mode. */
            $this->emitToken(array(
                'name' => 'head',
                'type' => HTML5_Tokenizer::STARTTAG,
                'attr' => array()
            ));
            $this->emitToken($token);
        }
        break;
    case self::IN_HEAD:
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
        or U+0020 SPACE. */
        if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
            /* Insert the character into the current node. */
            $this->insertText($token['data']);
        /* A comment token */
        } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
            /* Append a Comment node to the current node with the data attribute
            set to the data given in the comment token. */
            $this->insertComment($token['data']);
        /* A DOCTYPE token */
        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
            /* Parse error. Ignore the token. */
            $this->ignored = true;
            // parse error
        /* A start tag whose tag name is "html" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
        $token['name'] === 'html') {
            $this->processWithRulesFor($token, self::IN_BODY);
        /* A start tag whose tag name is one of: "base", "command", "link" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
        ($token['name'] === 'base' || $token['name'] === 'command' ||
        $token['name'] === 'link')) {
            /* Insert an HTML element for the token. Immediately pop the
             * current node off the stack of open elements. */
            $this->insertElement($token);
            array_pop($this->stack);
            // YYY: Acknowledge the token's self-closing flag, if it is set.
        /* A start tag whose tag name is "meta" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'meta') {
            /* Insert an HTML element for the token. Immediately pop the
             * current node off the stack of open elements. */
            $this->insertElement($token);
            array_pop($this->stack);
            // XERROR: Acknowledge the token's self-closing flag, if it is set.
            // XENCODING: If the element has a charset attribute, and its value is a
            // supported encoding, and the confidence is currently tentative,
            // then change the encoding to the encoding given by the value of
            // the charset attribute.
            //
            // Otherwise, if the element has a content attribute, and applying
            // the algorithm for extracting an encoding from a Content-Type to
            // its value returns a supported encoding encoding, and the
            // confidence is currently tentative, then change the encoding to
            // the encoding encoding.
        /* A start tag with the tag name "title" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'title') {
            $this->insertRCDATAElement($token);
        /* A start tag whose tag name is "noscript", if the scripting flag is enabled, or
         * A start tag whose tag name is one of: "noframes", "style" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
        ($token['name'] === 'noscript' || $token['name'] === 'noframes' || $token['name'] === 'style')) {
            // XSCRIPT: Scripting flag not respected
            $this->insertCDATAElement($token);
        // XSCRIPT: Scripting flag disable not implemented
        /* A start tag with the tag name "script" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') {
            /* 1. Create an element for the token in the HTML namespace. */
            $node = $this->insertElement($token, false);
            /* 2. Mark the element as being "parser-inserted" */
            // Uhhh... XSCRIPT
            /* 3. If the parser was originally created for the HTML
             * fragment parsing algorithm, then mark the script element as 
             * "already executed". (fragment case) */
            // ditto... XSCRIPT
            /* 4. Append the new element to the current node  and push it onto 
             * the stack of open elements.  */
            end($this->stack)->appendChild($node);
            $this->stack[] = $node;
            // I guess we could squash these together
            /* 6. Let the original insertion mode be the current insertion mode. */
            $this->original_mode = $this->mode;
            /* 7. Switch the insertion mode to "in CDATA/RCDATA" */
            $this->mode = self::IN_CDATA_RCDATA;
            /* 5. Switch the tokeniser's content model flag to the CDATA state. */
            $this->content_model = HTML5_Tokenizer::CDATA;
        /* An end tag with the tag name "head" */
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'head') {
            /* Pop the current node (which will be the head element) off the stack of open elements. */
            array_pop($this->stack);
            /* Change the insertion mode to "after head". */
            $this->mode = self::AFTER_HEAD;
        // Slight logic inversion here to minimize duplication
        /* A start tag with the tag name "head". */
        /* An end tag whose tag name is not one of: "body", "html", "br" */
        } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') ||
        ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] !== 'html' &&
        $token['name'] !== 'body' && $token['name'] !== 'br')) {
            // Parse error. Ignore the token.
            $this->ignored = true;
        /* Anything else */
        } else {
            /* Act as if an end tag token with the tag name "head" had been
             * seen, and reprocess the current token. */
            $this->emitToken(array(
                'name' => 'head',
                'type' => HTML5_Tokenizer::ENDTAG
            ));
            /* Then, reprocess the current token. */
            $this->emitToken($token);
        }
        break;
    case self::IN_HEAD_NOSCRIPT:
        if ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
            // parse error
        } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
            $this->processWithRulesFor($token, self::IN_BODY);
        } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'noscript') {
            /* Pop the current node (which will be a noscript element) from the
             * stack of open elements; the new current node will be a head
             * element. */
            array_pop($this->stack);
            $this->mode = self::IN_HEAD;
        } elseif (
            ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) ||
            ($token['type'] === HTML5_Tokenizer::COMMENT) ||
            ($token['type'] === HTML5_Tokenizer::STARTTAG && (
                $token['name'] === 'link' || $token['name'] === 'meta' ||
                $token['name'] === 'noframes' || $token['name'] === 'style'))) {
            $this->processWithRulesFor($token, self::IN_HEAD);
        // inverted logic
        } elseif (
            ($token['type'] === HTML5_Tokenizer::STARTTAG && (
                $token['name'] === 'head' || $token['name'] === 'noscript')) ||
            ($token['type'] === HTML5_Tokenizer::ENDTAG &&
                $token['name'] !== 'br')) {
            // parse error
        } else {
            // parse error
            $this->emitToken(array(
                'type' => HTML5_Tokenizer::ENDTAG,
                'name' => 'noscript',
            ));
            $this->emitToken($token);
        }
        break;
    case self::AFTER_HEAD:
        /* Handle the token as follows: */
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
        or U+0020 SPACE */
        if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
            /* Append the character to the current node. */
            $this->insertText($token['data']);
        /* A comment token */
        } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
            /* Append a Comment node to the current node with the data attribute
            set to the data given in the comment token. */
            $this->insertComment($token['data']);
        } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
            // parse error
        } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
            $this->processWithRulesFor($token, self::IN_BODY);
        /* A start tag token with the tag name "body" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'body') {
            $this->insertElement($token);
            /* Set the frameset-ok flag to "not ok". */
            $this->flag_frameset_ok = false;
            /* Change the insertion mode to "in body". */
            $this->mode = self::IN_BODY;
        /* A start tag token with the tag name "frameset" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'frameset') {
            /* Insert a frameset element for the token. */
            $this->insertElement($token);
            /* Change the insertion mode to "in frameset". */
            $this->mode = self::IN_FRAMESET;
        /* A start tag token whose tag name is one of: "base", "link", "meta",
        "script", "style", "title" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
        array('base', 'link', 'meta', 'noframes', 'script', 'style', 'title'))) {
            // parse error
            /* Push the node pointed to by the head element pointer onto the
             * stack of open elements. */
            $this->stack[] = $this->head_pointer;
            $this->processWithRulesFor($token, self::IN_HEAD);
            array_splice($this->stack, array_search($this->head_pointer, $this->stack, true), 1);
        // inversion of specification
        } elseif(
        ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') ||
        ($token['type'] === HTML5_Tokenizer::ENDTAG &&
            $token['name'] !== 'body' && $token['name'] !== 'html' &&
            $token['name'] !== 'br')) {
            // parse error
        /* Anything else */
        } else {
            $this->emitToken(array(
                'name' => 'body',
                'type' => HTML5_Tokenizer::STARTTAG,
                'attr' => array()
            ));
            $this->flag_frameset_ok = true;
            $this->emitToken($token);
        }
        break;
    case self::IN_BODY:
        /* Handle the token as follows: */
        switch($token['type']) {
            /* A character token */
            case HTML5_Tokenizer::CHARACTER:
            case HTML5_Tokenizer::SPACECHARACTER:
                /* Reconstruct the active formatting elements, if any. */
                $this->reconstructActiveFormattingElements();
                /* Append the token's character to the current node. */
                $this->insertText($token['data']);
                /* If the token is not one of U+0009 CHARACTER TABULATION,
                 * U+000A LINE FEED (LF), U+000C FORM FEED (FF),  or U+0020
                 * SPACE, then set the frameset-ok flag to "not ok". */
                // i.e., if any of the characters is not whitespace
                if (strlen($token['data']) !== strspn($token['data'], HTML5_Tokenizer::WHITESPACE)) {
                    $this->flag_frameset_ok = false;
                }
            break;
            /* A comment token */
            case HTML5_Tokenizer::COMMENT:
                /* Append a Comment node to the current node with the data
                attribute set to the data given in the comment token. */
                $this->insertComment($token['data']);
            break;
            case HTML5_Tokenizer::DOCTYPE:
                // parse error
            break;
            case HTML5_Tokenizer::EOF:
                // parse error
            break;
            case HTML5_Tokenizer::STARTTAG:
            switch($token['name']) {
                case 'html':
                    // parse error
                    /* For each attribute on the token, check to see if the
                     * attribute is already present on the top element of the
                     * stack of open elements. If it is not, add the attribute
                     * and its corresponding value to that element. */
                    foreach($token['attr'] as $attr) {
                        if(!$this->stack[0]->hasAttribute($attr['name'])) {
                            $this->stack[0]->setAttribute($attr['name'], $attr['value']);
                        }
                    }
                break;
                case 'base': case 'command': case 'link': case 'meta': case 'noframes':
                case 'script': case 'style': case 'title':
                    /* Process the token as if the insertion mode had been "in
                    head". */
                    $this->processWithRulesFor($token, self::IN_HEAD);
                break;
                /* A start tag token with the tag name "body" */
                case 'body':
                    /* Parse error. If the second element on the stack of open
                    elements is not a body element, or, if the stack of open
                    elements has only one node on it, then ignore the token.
                    (fragment case) */
                    if(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') {
                        $this->ignored = true;
                        // Ignore
                    /* Otherwise, for each attribute on the token, check to see
                    if the attribute is already present on the body element (the
                    second element)    on the stack of open elements. If it is not,
                    add the attribute and its corresponding value to that
                    element. */
                    } else {
                        foreach($token['attr'] as $attr) {
                            if(!$this->stack[1]->hasAttribute($attr['name'])) {
                                $this->stack[1]->setAttribute($attr['name'], $attr['value']);
                            }
                        }
                    }
                break;
                case 'frameset':
                    // parse error
                    /* If the second element on the stack of open elements is
                     * not a body element, or, if the stack of open elements
                     * has only one node on it, then ignore the token.
                     * (fragment case) */
                    if(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') {
                        $this->ignored = true;
                        // Ignore
                    } elseif (!$this->flag_frameset_ok) {
                        $this->ignored = true;
                        // Ignore
                    } else {
                        /* 1. Remove the second element on the stack of open 
                         * elements from its parent node, if it has one.  */
                        if($this->stack[1]->parentNode) {
                            $this->stack[1]->parentNode->removeChild($this->stack[1]);
                        }
                        /* 2. Pop all the nodes from the bottom of the stack of 
                         * open elements, from the current node up to the root 
                         * html element. */
                        array_splice($this->stack, 1);
                        $this->insertElement($token);
                        $this->mode = self::IN_FRAMESET;
                    }
                break;
                // in spec, there is a diversion here
                case 'address': case 'article': case 'aside': case 'blockquote':
                case 'center': case 'datagrid': case 'details': case 'dir':
                case 'div': case 'dl': case 'fieldset': case 'figure': case 'footer':
                case 'header': case 'hgroup': case 'menu': case 'nav':
                case 'ol': case 'p': case 'section': case 'ul':
                    /* If the stack of open elements has a p element in scope,
                    then act as if an end tag with the tag name p had been
                    seen. */
                    if($this->elementInScope('p')) {
                        $this->emitToken(array(
                            'name' => 'p',
                            'type' => HTML5_Tokenizer::ENDTAG
                        ));
                    }
                    /* Insert an HTML element for the token. */
                    $this->insertElement($token);
                break;
                /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4",
                "h5", "h6" */
                case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
                    /* If the stack of open elements has a p  element in scope,
                    then act as if an end tag with the tag name p had been seen. */
                    if($this->elementInScope('p')) {
                        $this->emitToken(array(
                            'name' => 'p',
                            'type' => HTML5_Tokenizer::ENDTAG
                        ));
                    }
                    /* If the current node is an element whose tag name is one
                     * of "h1", "h2", "h3", "h4", "h5", or "h6", then this is a
                     * parse error; pop the current node off the stack of open
                     * elements. */
                    $peek = array_pop($this->stack);
                    if (in_array($peek->tagName, array("h1", "h2", "h3", "h4", "h5", "h6"))) {
                        // parse error
                    } else {
                        $this->stack[] = $peek;
                    }
                    /* Insert an HTML element for the token. */
                    $this->insertElement($token);
                break;
                case 'pre': case 'listing':
                    /* If the stack of open elements has a p  element in scope,
                    then act as if an end tag with the tag name p had been seen. */
                    if($this->elementInScope('p')) {
                        $this->emitToken(array(
                            'name' => 'p',
                            'type' => HTML5_Tokenizer::ENDTAG
                        ));
                    }
                    $this->insertElement($token);
                    /* If the next token is a U+000A LINE FEED (LF) character
                     * token, then ignore that token and move on to the next
                     * one. (Newlines at the start of pre blocks are ignored as
                     * an authoring convenience.) */
                    $this->ignore_lf_token = 2;
                    $this->flag_frameset_ok = false;
                break;
                /* A start tag whose tag name is "form" */
                case 'form':
                    /* If the form element pointer is not null, ignore the
                    token with a parse error. */
                    if($this->form_pointer !== null) {
                        $this->ignored = true;
                        // Ignore.
                    /* Otherwise: */
                    } else {
                        /* If the stack of open elements has a p element in
                        scope, then act as if an end tag with the tag name p
                        had been seen. */
                        if($this->elementInScope('p')) {
                            $this->emitToken(array(
                                'name' => 'p',
                                'type' => HTML5_Tokenizer::ENDTAG
                            ));
                        }
                        /* Insert an HTML element for the token, and set the
                        form element pointer to point to the element created. */
                        $element = $this->insertElement($token);
                        $this->form_pointer = $element;
                    }
                break;
                // condensed specification
                case 'li': case 'dc': case 'dd': case 'ds': case 'dt':
                    /* 1. Set the frameset-ok flag to "not ok". */
                    $this->flag_frameset_ok = false;
                    $stack_length = count($this->stack) - 1;
                    for($n = $stack_length; 0 <= $n; $n--) {
                        /* 2. Initialise node to be the current node (the
                        bottommost node of the stack). */
                        $stop = false;
                        $node = $this->stack[$n];
                        $cat  = $this->getElementCategory($node);
                        // for case 'li':
                        /* 3. If node is an li element, then act as if an end
                         * tag with the tag name "li" had been seen, then jump
                         * to the last step.  */
                        // for case 'dc': case 'dd': case 'ds': case 'dt':
                        /* If node is a dc, dd, ds or dt element, then act as if an end
                         * tag with the same tag name as node had been seen, then
                         * jump to the last step. */
                        if(($token['name'] === 'li' && $node->tagName === 'li') ||
                        ($token['name'] !== 'li' && ($node->tagName == 'dc' || $node->tagName === 'dd' || $node->tagName == 'ds' || $node->tagName === 'dt'))) { // limited conditional
                            $this->emitToken(array(
                                'type' => HTML5_Tokenizer::ENDTAG,
                                'name' => $node->tagName,
                            ));
                            break;
                        }
                        /* 4. If node is not in the formatting category, and is
                        not    in the phrasing category, and is not an address,
                        div or p element, then stop this algorithm. */
                        if($cat !== self::FORMATTING && $cat !== self::PHRASING &&
                        $node->tagName !== 'address' && $node->tagName !== 'div' &&
                        $node->tagName !== 'p') {
                            break;
                        }
                        /* 5. Otherwise, set node to the previous entry in the
                         * stack of open elements and return to step 2. */
                    }
                    /* 6. This is the last step. */
                    /* If the stack of open elements has a p  element in scope,
                    then act as if an end tag with the tag name p had been
                    seen. */
                    if($this->elementInScope('p')) {
                        $this->emitToken(array(
                            'name' => 'p',
                            'type' => HTML5_Tokenizer::ENDTAG
                        ));
                    }
                    /* Finally, insert an HTML element with the same tag
                    name as the    token's. */
                    $this->insertElement($token);
                break;
                /* A start tag token whose tag name is "plaintext" */
                case 'plaintext':
                    /* If the stack of open elements has a p  element in scope,
                    then act as if an end tag with the tag name p had been
                    seen. */
                    if($this->elementInScope('p')) {
                        $this->emitToken(array(
                            'name' => 'p',
                            'type' => HTML5_Tokenizer::ENDTAG
                        ));
                    }
                    /* Insert an HTML element for the token. */
                    $this->insertElement($token);
                    $this->content_model = HTML5_Tokenizer::PLAINTEXT;
                break;
                // more diversions
                /* A start tag whose tag name is "a" */
                case 'a':
                    /* If the list of active formatting elements contains
                    an element whose tag name is "a" between the end of the
                    list and the last marker on the list (or the start of
                    the list if there is no marker on the list), then this
                    is a parse error; act as if an end tag with the tag name
                    "a" had been seen, then remove that element from the list
                    of active formatting elements and the stack of open
                    elements if the end tag didn't already remove it (it
                    might not have if the element is not in table scope). */
                    $leng = count($this->a_formatting);
                    for($n = $leng - 1; $n >= 0; $n--) {
                        if($this->a_formatting[$n] === self::MARKER) {
                            break;
                        } elseif($this->a_formatting[$n]->tagName === 'a') {
                            $a = $this->a_formatting[$n];
                            $this->emitToken(array(
                                'name' => 'a',
                                'type' => HTML5_Tokenizer::ENDTAG
                            ));
                            if (in_array($a, $this->a_formatting)) {
                                $a_i = array_search($a, $this->a_formatting, true);
                                if($a_i !== false) array_splice($this->a_formatting, $a_i, 1);
                            }
                            if (in_array($a, $this->stack)) {
                                $a_i = array_search($a, $this->stack, true);
                                if ($a_i !== false) array_splice($this->stack, $a_i, 1);
                            }
                            break;
                        }
                    }
                    /* Reconstruct the active formatting elements, if any. */
                    $this->reconstructActiveFormattingElements();
                    /* Insert an HTML element for the token. */
                    $el = $this->insertElement($token);
                    /* Add that element to the list of active formatting
                    elements. */
                    $this->a_formatting[] = $el;
                break;
                case 'b': case 'big': case 'code': case 'em': case 'font': case 'i':
                case 's': case 'small': case 'strike':
                case 'strong': case 'tt': case 'u':
                    /* Reconstruct the active formatting elements, if any. */
                    $this->reconstructActiveFormattingElements();
                    /* Insert an HTML element for the token. */
                    $el = $this->insertElement($token);
                    /* Add that element to the list of active formatting
                    elements. */
                    $this->a_formatting[] = $el;
                break;
                case 'nobr':
                    /* Reconstruct the active formatting elements, if any. */
                    $this->reconstructActiveFormattingElements();
                    /* If the stack of open elements has a nobr element in
                     * scope, then this is a parse error; act as if an end tag
                     * with the tag name "nobr" had been seen, then once again
                     * reconstruct the active formatting elements, if any. */
                    if ($this->elementInScope('nobr')) {
                        $this->emitToken(array(
                            'name' => 'nobr',
                            'type' => HTML5_Tokenizer::ENDTAG,
                        ));
                        $this->reconstructActiveFormattingElements();
                    }
                    /* Insert an HTML element for the token. */
                    $el = $this->insertElement($token);
                    /* Add that element to the list of active formatting
                    elements. */
                    $this->a_formatting[] = $el;
                break;
                // another diversion
                /* A start tag token whose tag name is "button" */
                case 'button':
                    /* If the stack of open elements has a button element in scope,
                    then this is a parse error; act as if an end tag with the tag
                    name "button" had been seen, then reprocess the token. (We don't
                    do that. Unnecessary.) (I hope you're right! -- ezyang) */
                    if($this->elementInScope('button')) {
                        $this->emitToken(array(
                            'name' => 'button',
                            'type' => HTML5_Tokenizer::ENDTAG
                        ));
                    }
                    /* Reconstruct the active formatting elements, if any. */
                    $this->reconstructActiveFormattingElements();
                    /* Insert an HTML element for the token. */
                    $this->insertElement($token);
                    /* Insert a marker at the end of the list of active
                    formatting elements. */
                    $this->a_formatting[] = self::MARKER;
                    $this->flag_frameset_ok = false;
                break;
                case 'applet': case 'marquee': case 'object':
                    /* Reconstruct the active formatting elements, if any. */
                    $this->reconstructActiveFormattingElements();
                    /* Insert an HTML element for the token. */
                    $this->insertElement($token);
                    /* Insert a marker at the end of the list of active
                    formatting elements. */
                    $this->a_formatting[] = self::MARKER;
                    $this->flag_frameset_ok = false;
                break;
                // spec diversion
                /* A start tag whose tag name is "table" */
                case 'table':
                    /* If the Document is not set to quirks mode, and the 
                     * stack of open elements has a p element in scope, then 
                     * act as if an end tag with the tag name "p" had been 
                     * seen. */
                    if($this->quirks_mode !== self::QUIRKS_MODE &&
                    $this->elementInScope('p')) {
                        $this->emitToken(array(
                            'name' => 'p',
                            'type' => HTML5_Tokenizer::ENDTAG
                        ));
                    }
                    /* Insert an HTML element for the token. */
                    $this->insertElement($token);
                    $this->flag_frameset_ok = false;
                    /* Change the insertion mode to "in table". */
                    $this->mode = self::IN_TABLE;
                break;
                /* A start tag whose tag name is one of: "area", "basefont",
                "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */
                case 'area': case 'basefont': case 'bgsound': case 'br':
                case 'embed': case 'img': case 'input': case 'keygen': case 'spacer':
                case 'wbr':
                    /* Reconstruct the active formatting elements, if any. */
                    $this->reconstructActiveFormattingElements();
                    /* Insert an HTML element for the token. */
                    $this->insertElement($token);
                    /* Immediately pop the current node off the stack of open elements. */
                    array_pop($this->stack);
                    // YYY: Acknowledge the token's self-closing flag, if it is set.
                    $this->flag_frameset_ok = false;
                break;
                case 'param': case 'source':
                    /* Insert an HTML element for the token. */
                    $this->insertElement($token);
                    /* Immediately pop the current node off the stack of open elements. */
                    array_pop($this->stack);
                    // YYY: Acknowledge the token's self-closing flag, if it is set.
                break;
                /* A start tag whose tag name is "hr" */
                case 'hr':
                    /* If the stack of open elements has a p element in scope,
                    then act as if an end tag with the tag name p had been seen. */
                    if($this->elementInScope('p')) {
                        $this->emitToken(array(
                            'name' => 'p',
                            'type' => HTML5_Tokenizer::ENDTAG
                        ));
                    }
                    /* Insert an HTML element for the token. */
                    $this->insertElement($token);
                    /* Immediately pop the current node off the stack of open elements. */
                    array_pop($this->stack);
                    // YYY: Acknowledge the token's self-closing flag, if it is set.
                    $this->flag_frameset_ok = false;
                break;
                /* A start tag whose tag name is "image" */
                case 'image':
                    /* Parse error. Change the token's tag name to "img" and
                    reprocess it. (Don't ask.) */
                    $token['name'] = 'img';
                    $this->emitToken($token);
                break;
                /* A start tag whose tag name is "isindex" */
                case 'isindex':
                    /* Parse error. */
                    /* If the form element pointer is not null,
                    then ignore the token. */
                    if($this->form_pointer === null) {
                        /* Act as if a start tag token with the tag name "form" had
                        been seen. */
                        /* If the token has an attribute called "action", set
                         * the action attribute on the resulting form
                         * element to the value of the "action" attribute of
                         * the token. */
                        $attr = array();
                        $action = $this->getAttr($token, 'action');
                        if ($action !== false) {
                            $attr[] = array('name' => 'action', 'value' => $action);
                        }
                        $this->emitToken(array(
                            'name' => 'form',
                            'type' => HTML5_Tokenizer::STARTTAG,
                            'attr' => $attr
                        ));
                        /* Act as if a start tag token with the tag name "hr" had
                        been seen. */
                        $this->emitToken(array(
                            'name' => 'hr',
                            'type' => HTML5_Tokenizer::STARTTAG,
                            'attr' => array()
                        ));
                        /* Act as if a start tag token with the tag name "label"
                        had been seen. */
                        $this->emitToken(array(
                            'name' => 'label',
                            'type' => HTML5_Tokenizer::STARTTAG,
                            'attr' => array()
                        ));
                        /* Act as if a stream of character tokens had been seen. */
                        $prompt = $this->getAttr($token, 'prompt');
                        if ($prompt === false) {
                            $prompt = 'This is a searchable index. '.
                            'Insert your search keywords here: ';
                        }
                        $this->emitToken(array(
                            'data' => $prompt,
                            'type' => HTML5_Tokenizer::CHARACTER,
                        ));
                        /* Act as if a start tag token with the tag name "input"
                        had been seen, with all the attributes from the "isindex"
                        token, except with the "name" attribute set to the value
                        "isindex" (ignoring any explicit "name" attribute). */
                        $attr = array();
                        foreach ($token['attr'] as $keypair) {
                            if ($keypair['name'] === 'name' || $keypair['name'] === 'action' ||
                                $keypair['name'] === 'prompt') continue;
                            $attr[] = $keypair;
                        }
                        $attr[] = array('name' => 'name', 'value' => 'isindex');
                        $this->emitToken(array(
                            'name' => 'input',
                            'type' => HTML5_Tokenizer::STARTTAG,
                            'attr' => $attr
                        ));
                        /* Act as if an end tag token with the tag name "label"
                        had been seen. */
                        $this->emitToken(array(
                            'name' => 'label',
                            'type' => HTML5_Tokenizer::ENDTAG
                        ));
                        /* Act as if a start tag token with the tag name "hr" had
                        been seen. */
                        $this->emitToken(array(
                            'name' => 'hr',
                            'type' => HTML5_Tokenizer::STARTTAG
                        ));
                        /* Act as if an end tag token with the tag name "form" had
                        been seen. */
                        $this->emitToken(array(
                            'name' => 'form',
                            'type' => HTML5_Tokenizer::ENDTAG
                        ));
                    } else {
                        $this->ignored = true;
                    }
                break;
                /* A start tag whose tag name is "textarea" */
                case 'textarea':
                    $this->insertElement($token);
                    /* If the next token is a U+000A LINE FEED (LF)
                     * character token, then ignore that token and move on to
                     * the next one. (Newlines at the start of textarea
                     * elements are ignored as an authoring convenience.)
                     * need flag, see also <pre> */
                    $this->ignore_lf_token = 2;
                    $this->original_mode = $this->mode;
                    $this->flag_frameset_ok = false;
                    $this->mode = self::IN_CDATA_RCDATA;
                    /* Switch the tokeniser's content model flag to the
                    RCDATA state. */
                    $this->content_model = HTML5_Tokenizer::RCDATA;
                break;
                /* A start tag token whose tag name is "xmp" */
                case 'xmp':
                    /* If the stack of open elements has a p element in
                    scope, then act as if an end tag with the tag name
                    "p" has been seen. */
                    if ($this->elementInScope('p')) {
                        $this->emitToken(array(
                            'name' => 'p',
                            'type' => HTML5_Tokenizer::ENDTAG
                        ));
                    }
                    /* Reconstruct the active formatting elements, if any. */
                    $this->reconstructActiveFormattingElements();
                    $this->flag_frameset_ok = false;
                    $this->insertCDATAElement($token);
                break;
                case 'iframe':
                    $this->flag_frameset_ok = false;
                    $this->insertCDATAElement($token);
                break;
                case 'noembed': case 'noscript':
                    // XSCRIPT: should check scripting flag
                    $this->insertCDATAElement($token);
                break;
                /* A start tag whose tag name is "select" */
                case 'select':
                    /* Reconstruct the active formatting elements, if any. */
                    $this->reconstructActiveFormattingElements();
                    /* Insert an HTML element for the token. */
                    $this->insertElement($token);
                    $this->flag_frameset_ok = false;
                    /* If the insertion mode is one of in table", "in caption",
                     * "in column group", "in table body", "in row", or "in
                     * cell", then switch the insertion mode to "in select in
                     * table". Otherwise, switch the insertion mode  to "in
                     * select". */
                    if (
                        $this->mode === self::IN_TABLE || $this->mode === self::IN_CAPTION ||
                        $this->mode === self::IN_COLUMN_GROUP || $this->mode ==+self::IN_TABLE_BODY ||
                        $this->mode === self::IN_ROW || $this->mode === self::IN_CELL
                    ) {
                        $this->mode = self::IN_SELECT_IN_TABLE;
                    } else {
                        $this->mode = self::IN_SELECT;
                    }
                break;
                case 'option': case 'optgroup':
                    if ($this->elementInScope('option')) {
                        $this->emitToken(array(
                            'name' => 'option',
                            'type' => HTML5_Tokenizer::ENDTAG,
                        ));
                    }
                    $this->reconstructActiveFormattingElements();
                    $this->insertElement($token);
                break;
                case 'rp': case 'rt':
                    /* If the stack of open elements has a ruby element in scope, then generate
                     * implied end tags. If the current node is not then a ruby element, this is
                     * a parse error; pop all the nodes from the current node up to the node
                     * immediately before the bottommost ruby element on the stack of open elements.
                     */
                    if ($this->elementInScope('ruby')) {
                        $this->generateImpliedEndTags();
                    }
                    $peek = false;
                    do {
                        if ($peek) {
                            // parse error
                        }
                        $peek = array_pop($this->stack);
                    } while ($peek->tagName !== 'ruby');
                    $this->stack[] = $peek; // we popped one too many
                    $this->insertElement($token);
                break;
                // spec diversion
                case 'math':
                    $this->reconstructActiveFormattingElements();
                    $token = $this->adjustMathMLAttributes($token);
                    $token = $this->adjustForeignAttributes($token);
                    $this->insertForeignElement($token, self::NS_MATHML);
                    if (isset($token['self-closing'])) {
                        // XERROR: acknowledge the token's self-closing flag
                        array_pop($this->stack);
                    }
                    if ($this->mode !== self::IN_FOREIGN_CONTENT) {
                        $this->secondary_mode = $this->mode;
                        $this->mode = self::IN_FOREIGN_CONTENT;
                    }
                break;
                case 'svg':
                    $this->reconstructActiveFormattingElements();
                    $token = $this->adjustSVGAttributes($token);
                    $token = $this->adjustForeignAttributes($token);
                    $this->insertForeignElement($token, self::NS_SVG);
                    if (isset($token['self-closing'])) {
                        // XERROR: acknowledge the token's self-closing flag
                        array_pop($this->stack);
                    }
                    if ($this->mode !== self::IN_FOREIGN_CONTENT) {
                        $this->secondary_mode = $this->mode;
                        $this->mode = self::IN_FOREIGN_CONTENT;
                    }
                break;
                case 'caption': case 'col': case 'colgroup': case 'frame': case 'head':
                case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': case 'tr':
                    // parse error
                break;
                /* A start tag token not covered by the previous entries */
                default:
                    /* Reconstruct the active formatting elements, if any. */
                    $this->reconstructActiveFormattingElements();
                    $this->insertElement($token);
                    /* This element will be a phrasing  element. */
                break;
            }
            break;
            case HTML5_Tokenizer::ENDTAG:
            switch($token['name']) {
                /* An end tag with the tag name "body" */
                case 'body':
                    /* If the stack of open elements does not have a body 
                     * element in scope, this is a parse error; ignore the 
                     * token. */
                    if(!$this->elementInScope('body')) {
                        $this->ignored = true;
                    /* Otherwise, if there is a node in the stack of open 
                     * elements that is not either a dc element, a dd element, 
                     * a ds element, a dt element, an li element, an optgroup 
                     * element, an option element, a p element, an rp element, 
                     * an rt element, a tbody element, a td element, a tfoot 
                     * element, a th element, a thead element, a tr element, 
                     * the body element, or the html element, then this is a 
                     * parse error.
                     */
                    } else {
                        // XERROR: implement this check for parse error
                    }
                    /* Change the insertion mode to "after body". */
                    $this->mode = self::AFTER_BODY;
                break;
                /* An end tag with the tag name "html" */
                case 'html':
                    /* Act as if an end tag with tag name "body" had been seen,
                    then, if that token wasn't ignored, reprocess the current
                    token. */
                    $this->emitToken(array(
                        'name' => 'body',
                        'type' => HTML5_Tokenizer::ENDTAG
                    ));
                    if (!$this->ignored) $this->emitToken($token);
                break;
                case 'address': case 'article': case 'aside': case 'blockquote':
                case 'center': case 'datagrid': case 'details': case 'dir':
                case 'div': case 'dl': case 'fieldset': case 'footer':
                case 'header': case 'hgroup': case 'listing': case 'menu':
                case 'nav': case 'ol': case 'pre': case 'section': case 'ul':
                    /* If the stack of open elements has an element in scope
                    with the same tag name as that of the token, then generate
                    implied end tags. */
                    if($this->elementInScope($token['name'])) {
                        $this->generateImpliedEndTags();
                        /* Now, if the current node is not an element with
                        the same tag name as that of the token, then this
                        is a parse error. */
                        // XERROR: implement parse error logic
                        /* If the stack of open elements has an element in
                        scope with the same tag name as that of the token,
                        then pop elements from this stack until an element
                        with that tag name has been popped from the stack. */
                        do {
                            $node = array_pop($this->stack);
                        } while ($node->tagName !== $token['name']);
                    } else {
                        // parse error
                    }
                break;
                /* An end tag whose tag name is "form" */
                case 'form':
                    /* Let node be the element that the form element pointer is set to. */
                    $node = $this->form_pointer;
                    /* Set the form element pointer  to null. */
                    $this->form_pointer = null;
                    /* If node is null or the stack of open elements does not 
                        * have node in scope, then this is a parse error; ignore the token. */
                    if ($node === null || !in_array($node, $this->stack)) {
                        // parse error
                        $this->ignored = true;
                    } else {
                        /* 1. Generate implied end tags. */
                        $this->generateImpliedEndTags();
                        /* 2. If the current node is not node, then this is a parse error.  */
                        if (end($this->stack) !== $node) {
                            // parse error
                        }
                        /* 3. Remove node from the stack of open elements. */
                        array_splice($this->stack, array_search($node, $this->stack, true), 1);
                    }
                break;
                /* An end tag whose tag name is "p" */
                case 'p':
                    /* If the stack of open elements has a p element in scope,
                    then generate implied end tags, except for p elements. */
                    if($this->elementInScope('p')) {
                        /* Generate implied end tags, except for elements with
                         * the same tag name as the token. */
                        $this->generateImpliedEndTags(array('p'));
                        /* If the current node is not a p element, then this is
                        a parse error. */
                        // XERROR: implement
                        /* Pop elements from the stack of open elements  until
                         * an element with the same tag name as the token has
                         * been popped from the stack. */
                        do {
                            $node = array_pop($this->stack);
                        } while ($node->tagName !== 'p');
                    } else {
                        // parse error
                        $this->emitToken(array(
                            'name' => 'p',
                            'type' => HTML5_Tokenizer::STARTTAG,
                        ));
                        $this->emitToken($token);
                    }
                break;
                /* An end tag whose tag name is "li" */
                case 'li':
                    /* If the stack of open elements does not have an element
                     * in list item scope with the same tag name as that of the
                     * token, then this is a parse error; ignore the token. */
                    if ($this->elementInScope($token['name'], self::SCOPE_LISTITEM)) {
                        /* Generate implied end tags, except for elements with the
                         * same tag name as the token. */
                        $this->generateImpliedEndTags(array($token['name']));
                        /* If the current node is not an element with the same tag
                         * name as that of the token, then this is a parse error. */
                        // XERROR: parse error
                        /* Pop elements from the stack of open elements  until an
                         * element with the same tag name as the token has been
                         * popped from the stack. */
                        do {
                            $node = array_pop($this->stack);
                        } while ($node->tagName !== $token['name']);
                    } else {
                        // XERROR: parse error
                    }
                break;
                /* An end tag whose tag name is "dc", "dd", "ds", "dt" */
                case 'dc': case 'dd': case 'ds': case 'dt':
                    if($this->elementInScope($token['name'])) {
                        $this->generateImpliedEndTags(array($token['name']));
                        /* If the current node is not an element with the same
                        tag name as the token, then this is a parse error. */
                        // XERROR: implement parse error
                        /* Pop elements from the stack of open elements  until
                         * an element with the same tag name as the token has
                         * been popped from the stack. */
                        do {
                            $node = array_pop($this->stack);
                        } while ($node->tagName !== $token['name']);
                    } else {
                        // XERROR: parse error
                    }
                break;
                /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",
                "h5", "h6" */
                case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
                    $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
                    /* If the stack of open elements has in scope an element whose
                    tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
                    generate implied end tags. */
                    if($this->elementInScope($elements)) {
                        $this->generateImpliedEndTags();
                        /* Now, if the current node is not an element with the same
                        tag name as that of the token, then this is a parse error. */
                        // XERROR: implement parse error
                        /* If the stack of open elements has in scope an element
                        whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
                        "h6", then pop elements from the stack until an element
                        with one of those tag names has been popped from the stack. */
                        do {
                            $node = array_pop($this->stack);
                        } while (!in_array($node->tagName, $elements));
                    } else {
                        // parse error
                    }
                break;
                /* An end tag whose tag name is one of: "a", "b", "big", "em",
                "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
                case 'a': case 'b': case 'big': case 'code': case 'em': case 'font':
                case 'i': case 'nobr': case 's': case 'small': case 'strike':
                case 'strong': case 'tt': case 'u':
                    // XERROR: generally speaking this needs parse error logic
                    /* 1. Let the formatting element be the last element in
                    the list of active formatting elements that:
                        * is between the end of the list and the last scope
                        marker in the list, if any, or the start of the list
                        otherwise, and
                        * has the same tag name as the token.
                    */
                    while(true) {
                        for($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
                            if($this->a_formatting[$a] === self::MARKER) {
                                break;
                            } elseif($this->a_formatting[$a]->tagName === $token['name']) {
                                $formatting_element = $this->a_formatting[$a];
                                $in_stack = in_array($formatting_element, $this->stack, true);
                                $fe_af_pos = $a;
                                break;
                            }
                        }
                        /* If there is no such node, or, if that node is
                        also in the stack of open elements but the element
                        is not in scope, then this is a parse error. Abort
                        these steps. The token is ignored. */
                        if(!isset($formatting_element) || ($in_stack &&
                        !$this->elementInScope($token['name']))) {
                            $this->ignored = true;
                            break;
                        /* Otherwise, if there is such a node, but that node
                        is not in the stack of open elements, then this is a
                        parse error; remove the element from the list, and
                        abort these steps. */
                        } elseif(isset($formatting_element) && !$in_stack) {
                            unset($this->a_formatting[$fe_af_pos]);
                            $this->a_formatting = array_merge($this->a_formatting);
                            break;
                        }
                        /* Otherwise, there is a formatting element and that
                         * element is in the stack and is in scope. If the
                         * element is not the current node, this is a parse
                         * error. In any case, proceed with the algorithm as
                         * written in the following steps. */
                        // XERROR: implement me
                        /* 2. Let the furthest block be the topmost node in the
                        stack of open elements that is lower in the stack
                        than the formatting element, and is not an element in
                        the phrasing or formatting categories. There might
                        not be one. */
                        $fe_s_pos = array_search($formatting_element, $this->stack, true);
                        $length = count($this->stack);
                        for($s = $fe_s_pos + 1; $s < $length; $s++) {
                            $category = $this->getElementCategory($this->stack[$s]);
                            if($category !== self::PHRASING && $category !== self::FORMATTING) {
                                $furthest_block = $this->stack[$s];
                                break;
                            }
                        }
                        /* 3. If there is no furthest block, then the UA must
                        skip the subsequent steps and instead just pop all
                        the nodes from the bottom of the stack of open
                        elements, from the current node up to the formatting
                        element, and remove the formatting element from the
                        list of active formatting elements. */
                        if(!isset($furthest_block)) {
                            for($n = $length - 1; $n >= $fe_s_pos; $n--) {
                                array_pop($this->stack);
                            }
                            unset($this->a_formatting[$fe_af_pos]);
                            $this->a_formatting = array_merge($this->a_formatting);
                            break;
                        }
                        /* 4. Let the common ancestor be the element
                        immediately above the formatting element in the stack
                        of open elements. */
                        $common_ancestor = $this->stack[$fe_s_pos - 1];
                        /* 5. Let a bookmark note the position of the
                        formatting element in the list of active formatting
                        elements relative to the elements on either side
                        of it in the list. */
                        $bookmark = $fe_af_pos;
                        /* 6. Let node and last node  be the furthest block.
                        Follow these steps: */
                        $node = $furthest_block;
                        $last_node = $furthest_block;
                        while(true) {
                            for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
                                /* 6.1 Let node be the element immediately
                                prior to node in the stack of open elements. */
                                $node = $this->stack[$n];
                                /* 6.2 If node is not in the list of active
                                formatting elements, then remove node from
                                the stack of open elements and then go back
                                to step 1. */
                                if(!in_array($node, $this->a_formatting, true)) {
                                    array_splice($this->stack, $n, 1);
                                } else {
                                    break;
                                }
                            }
                            /* 6.3 Otherwise, if node is the formatting
                            element, then go to the next step in the overall
                            algorithm. */
                            if($node === $formatting_element) {
                                break;
                            /* 6.4 Otherwise, if last node is the furthest
                            block, then move the aforementioned bookmark to
                            be immediately after the node in the list of
                            active formatting elements. */
                            } elseif($last_node === $furthest_block) {
                                $bookmark = array_search($node, $this->a_formatting, true) + 1;
                            }
                            /* 6.5 Create an element for the token for which
                             * the element node was created, replace the entry
                             * for node in the list of active formatting
                             * elements with an entry for the new element,
                             * replace the entry for node in the stack of open
                             * elements with an entry for the new element, and
                             * let node be the new element. */
                            // we don't know what the token is anymore
                            // XDOM
                            $clone = $node->cloneNode();
                            $a_pos = array_search($node, $this->a_formatting, true);
                            $s_pos = array_search($node, $this->stack, true);
                            $this->a_formatting[$a_pos] = $clone;
                            $this->stack[$s_pos] = $clone;
                            $node = $clone;
                            /* 6.6 Insert last node into node, first removing
                            it from its previous parent node if any. */
                            // XDOM
                            if($last_node->parentNode !== null) {
                                $last_node->parentNode->removeChild($last_node);
                            }
                            // XDOM
                            $node->appendChild($last_node);
                            /* 6.7 Let last node be node. */
                            $last_node = $node;
                            /* 6.8 Return to step 1 of this inner set of steps. */
                        }
                        /* 7. If the common ancestor node is a table, tbody,
                         * tfoot, thead, or tr element, then, foster parent
                         * whatever last node ended up being in the previous
                         * step, first removing it from its previous parent
                         * node if any. */
                        // XDOM
                        if ($last_node->parentNode) { // common step
                            $last_node->parentNode->removeChild($last_node);
                        }
                        if (in_array($common_ancestor->tagName, array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
                            $this->fosterParent($last_node);
                        /* Otherwise, append whatever last node  ended up being
                         * in the previous step to the common ancestor node,
                         * first removing it from its previous parent node if
                         * any. */
                        } else {
                            // XDOM
                            $common_ancestor->appendChild($last_node);
                        }
                        /* 8. Create an element for the token for which the
                         * formatting element was created. */
                        // XDOM
                        $clone = $formatting_element->cloneNode();
                        /* 9. Take all of the child nodes of the furthest
                        block and append them to the element created in the
                        last step. */
                        // XDOM
                        while($furthest_block->hasChildNodes()) {
                            $child = $furthest_block->firstChild;
                            $furthest_block->removeChild($child);
                            $clone->appendChild($child);
                        }
                        /* 10. Append that clone to the furthest block. */
                        // XDOM
                        $furthest_block->appendChild($clone);
                        /* 11. Remove the formatting element from the list
                        of active formatting elements, and insert the new element
                        into the list of active formatting elements at the
                        position of the aforementioned bookmark. */
                        $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
                        array_splice($this->a_formatting, $fe_af_pos, 1);
                        $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
                        $af_part2 = array_slice($this->a_formatting, $bookmark);
                        $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
                        /* 12. Remove the formatting element from the stack
                        of open elements, and insert the new element into the stack
                        of open elements immediately below the position of the
                        furthest block in that stack. */
                        $fe_s_pos = array_search($formatting_element, $this->stack, true);
                        array_splice($this->stack, $fe_s_pos, 1);
                        $fb_s_pos = array_search($furthest_block, $this->stack, true);
                        $s_part1 = array_slice($this->stack, 0, $fb_s_pos + 1);
                        $s_part2 = array_slice($this->stack, $fb_s_pos + 1);
                        $this->stack = array_merge($s_part1, array($clone), $s_part2);
                        /* 13. Jump back to step 1 in this series of steps. */
                        unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
                    }
                break;
                case 'applet': case 'button': case 'marquee': case 'object':
                    /* If the stack of open elements has an element in scope whose
                    tag name matches the tag name of the token, then generate implied
                    tags. */
                    if($this->elementInScope($token['name'])) {
                        $this->generateImpliedEndTags();
                        /* Now, if the current node is not an element with the same
                        tag name as the token, then this is a parse error. */
                        // XERROR: implement logic
                        /* Pop elements from the stack of open elements  until
                         * an element with the same tag name as the token has
                         * been popped from the stack. */
                        do {
                            $node = array_pop($this->stack);
                        } while ($node->tagName !== $token['name']);
                        /* Clear the list of active formatting elements up to the
                         * last marker. */
                        $keys = array_keys($this->a_formatting, self::MARKER, true);
                        $marker = end($keys);
                        for($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
                            array_pop($this->a_formatting);
                        }
                    } else {
                        // parse error
                    }
                break;
                case 'br':
                    // Parse error
                    $this->emitToken(array(
                        'name' => 'br',
                        'type' => HTML5_Tokenizer::STARTTAG,
                    ));
                break;
                /* An end tag token not covered by the previous entries */
                default:
                    for($n = count($this->stack) - 1; $n >= 0; $n--) {
                        /* Initialise node to be the current node (the bottommost
                        node of the stack). */
                        $node = $this->stack[$n];
                        /* If node has the same tag name as the end tag token,
                        then: */
                        if($token['name'] === $node->tagName) {
                            /* Generate implied end tags. */
                            $this->generateImpliedEndTags();
                            /* If the tag name of the end tag token does not
                            match the tag name of the current node, this is a
                            parse error. */
                            // XERROR: implement this
                            /* Pop all the nodes from the current node up to
                            node, including node, then stop these steps. */
                            // XSKETCHY
                            do {
                                $pop = array_pop($this->stack);
                            } while ($pop !== $node);
                            break;
                        } else {
                            $category = $this->getElementCategory($node);
                            if($category !== self::FORMATTING && $category !== self::PHRASING) {
                                /* Otherwise, if node is in neither the formatting
                                category nor the phrasing category, then this is a
                                parse error. Stop this algorithm. The end tag token
                                is ignored. */
                                $this->ignored = true;
                                break;
                                // parse error
                            }
                        }
                        /* Set node to the previous entry in the stack of open elements. Loop. */
                    }
                break;
            }
            break;
        }
        break;
    case self::IN_CDATA_RCDATA:
        if (
            $token['type'] === HTML5_Tokenizer::CHARACTER ||
            $token['type'] === HTML5_Tokenizer::SPACECHARACTER
        ) {
            $this->insertText($token['data']);
        } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
            // parse error
            /* If the current node is a script  element, mark the script
             * element as "already executed". */
            // probably not necessary
            array_pop($this->stack);
            $this->mode = $this->original_mode;
            $this->emitToken($token);
        } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'script') {
            array_pop($this->stack);
            $this->mode = $this->original_mode;
            // we're ignoring all of the execution stuff
        } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG) {
            array_pop($this->stack);
            $this->mode = $this->original_mode;
        }
    break;
    case self::IN_TABLE:
        $clear = array('html', 'table');
        /* A character token */
        if ($token['type'] === HTML5_Tokenizer::CHARACTER ||
            $token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
            /* Let the pending table character tokens
             * be an empty list of tokens. */
            $this->pendingTableCharacters = "";
            $this->pendingTableCharactersDirty = false;
            /* Let the original insertion mode be the current
             * insertion mode. */
            $this->original_mode = $this->mode;
            /* Switch the insertion mode to
             * "in table text" and
             * reprocess the token. */
            $this->mode = self::IN_TABLE_TEXT;
            $this->emitToken($token);
        /* A comment token */
        } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
            /* Append a Comment node to the current node with the data
            attribute set to the data given in the comment token. */
            $this->insertComment($token['data']);
        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
            // parse error
        /* A start tag whose tag name is "caption" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
        $token['name'] === 'caption') {
            /* Clear the stack back to a table context. */
            $this->clearStackToTableContext($clear);
            /* Insert a marker at the end of the list of active
            formatting elements. */
            $this->a_formatting[] = self::MARKER;
            /* Insert an HTML element for the token, then switch the
            insertion mode to "in caption". */
            $this->insertElement($token);
            $this->mode = self::IN_CAPTION;
        /* A start tag whose tag name is "colgroup" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
        $token['name'] === 'colgroup') {
            /* Clear the stack back to a table context. */
            $this->clearStackToTableContext($clear);
            /* Insert an HTML element for the token, then switch the
            insertion mode to "in column group". */
            $this->insertElement($token);
            $this->mode = self::IN_COLUMN_GROUP;
        /* A start tag whose tag name is "col" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
        $token['name'] === 'col') {
            $this->emitToken(array(
                'name' => 'colgroup',
                'type' => HTML5_Tokenizer::STARTTAG,
                'attr' => array()
            ));
            $this->emitToken($token);
        /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
        array('tbody', 'tfoot', 'thead'))) {
            /* Clear the stack back to a table context. */
            $this->clearStackToTableContext($clear);
            /* Insert an HTML element for the token, then switch the insertion
            mode to "in table body". */
            $this->insertElement($token);
            $this->mode = self::IN_TABLE_BODY;
        /* A start tag whose tag name is one of: "td", "th", "tr" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
        in_array($token['name'], array('td', 'th', 'tr'))) {
            /* Act as if a start tag token with the tag name "tbody" had been
            seen, then reprocess the current token. */
            $this->emitToken(array(
                'name' => 'tbody',
                'type' => HTML5_Tokenizer::STARTTAG,
                'attr' => array()
            ));
            $this->emitToken($token);
        /* A start tag whose tag name is "table" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
        $token['name'] === 'table') {
            /* Parse error. Act as if an end tag token with the tag name "table"
            had been seen, then, if that token wasn't ignored, reprocess the
            current token. */
            $this->emitToken(array(
                'name' => 'table',
                'type' => HTML5_Tokenizer::ENDTAG
            ));
            if (!$this->ignored) $this->emitToken($token);
        /* An end tag whose tag name is "table" */
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
        $token['name'] === 'table') {
            /* If the stack of open elements does not have an element in table
            scope with the same tag name as the token, this is a parse error.
            Ignore the token. (fragment case) */
            if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
                $this->ignored = true;
            /* Otherwise: */
            } else {
                do {
                    $node = array_pop($this->stack);
                } while ($node->tagName !== 'table');
                /* Reset the insertion mode appropriately. */
                $this->resetInsertionMode();
            }
        /* An end tag whose tag name is one of: "body", "caption", "col",
        "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
        array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td',
        'tfoot', 'th', 'thead', 'tr'))) {
            // Parse error. Ignore the token.
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
        ($token['name'] === 'style' || $token['name'] === 'script')) {
            $this->processWithRulesFor($token, self::IN_HEAD);
        } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'input' &&
        // assignment is intentional
        /* If the token does not have an attribute with the name "type", or
         * if it does, but that attribute's value is not an ASCII
         * case-insensitive match for the string "hidden", then: act as
         * described in the "anything else" entry below. */
        ($type = $this->getAttr($token, 'type')) && strtolower($type) === 'hidden') {
            // I.e., if its an input with the type attribute == 'hidden'
            /* Otherwise */
            // parse error
            $this->insertElement($token);
            array_pop($this->stack);
        } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
            /* If the current node is not the root html element, then this is a parse error. */
            if (end($this->stack)->tagName !== 'html') {
                // Note: It can only be the current node in the fragment case.
                // parse error
            }
            /* Stop parsing. */
        /* Anything else */
        } else {
            /* Parse error. Process the token as if the insertion mode was "in
            body", with the following exception: */
            $old = $this->foster_parent;
            $this->foster_parent = true;
            $this->processWithRulesFor($token, self::IN_BODY);
            $this->foster_parent = $old;
        }
    break;
    case self::IN_TABLE_TEXT:
        /* A character token */
        if($token['type'] === HTML5_Tokenizer::CHARACTER) {
            /* Append the character token to the pending table
             * character tokens list. */
            $this->pendingTableCharacters .= $token['data'];
            $this->pendingTableCharactersDirty = true;
        } elseif ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
            $this->pendingTableCharacters .= $token['data'];
        /* Anything else */
        } else {
            if ($this->pendingTableCharacters !== '' && is_string($this->pendingTableCharacters)) {
                /* If any of the tokens in the pending table character tokens list 
                 * are character tokens that are not one of U+0009 CHARACTER 
                 * TABULATION, U+000A LINE FEED (LF), U+000C FORM FEED (FF), or 
                 * U+0020 SPACE, then reprocess those character tokens using the 
                 * rules given in the "anything else" entry in the in table" 
                 * insertion mode.*/
                if ($this->pendingTableCharactersDirty) {
                    /* Parse error. Process the token using the rules for the 
                     * "in body" insertion mode, except that if the current 
                     * node is a table, tbody, tfoot, thead, or tr element, 
                     * then, whenever a node would be inserted into the current 
                     * node, it must instead be foster parented. */
                    // XERROR
                    $old = $this->foster_parent;
                    $this->foster_parent = true;
                    $text_token = array(
                        'type' => HTML5_Tokenizer::CHARACTER,
                        'data' => $this->pendingTableCharacters,
                    );
                    $this->processWithRulesFor($text_token, self::IN_BODY);
                    $this->foster_parent = $old;
                /* Otherwise, insert the characters given by the pending table 
                 * character tokens list into the current node. */
                } else {
                    $this->insertText($this->pendingTableCharacters);
                }
                $this->pendingTableCharacters = null;
                $this->pendingTableCharactersNull = null;
            }
            /* Switch the insertion mode to the original insertion mode and 
             * reprocess the token.
             */
            $this->mode = $this->original_mode;
            $this->emitToken($token);
        }
    break;
    case self::IN_CAPTION:
        /* An end tag whose tag name is "caption" */
        if($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'caption') {
            /* If the stack of open elements does not have an element in table
            scope with the same tag name as the token, this is a parse error.
            Ignore the token. (fragment case) */
            if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
                $this->ignored = true;
                // Ignore
            /* Otherwise: */
            } else {
                /* Generate implied end tags. */
                $this->generateImpliedEndTags();
                /* Now, if the current node is not a caption element, then this
                is a parse error. */
                // XERROR: implement
                /* Pop elements from this stack until a caption element has
                been popped from the stack. */
                do {
                    $node = array_pop($this->stack);
                } while ($node->tagName !== 'caption');
                /* Clear the list of active formatting elements up to the last
                marker. */
                $this->clearTheActiveFormattingElementsUpToTheLastMarker();
                /* Switch the insertion mode to "in table". */
                $this->mode = self::IN_TABLE;
            }
        /* A start tag whose tag name is one of: "caption", "col", "colgroup",
        "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag
        name is "table" */
        } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
        array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
        'thead', 'tr'))) || ($token['type'] === HTML5_Tokenizer::ENDTAG &&
        $token['name'] === 'table')) {
            /* Parse error. Act as if an end tag with the tag name "caption"
            had been seen, then, if that token wasn't ignored, reprocess the
            current token. */
            $this->emitToken(array(
                'name' => 'caption',
                'type' => HTML5_Tokenizer::ENDTAG
            ));
            if (!$this->ignored) $this->emitToken($token);
        /* An end tag whose tag name is one of: "body", "col", "colgroup",
        "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
        array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th',
        'thead', 'tr'))) {
            // Parse error. Ignore the token.
            $this->ignored = true;
        /* Anything else */
        } else {
            /* Process the token as if the insertion mode was "in body". */
            $this->processWithRulesFor($token, self::IN_BODY);
        }
    break;
    case self::IN_COLUMN_GROUP:
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
        or U+0020 SPACE */
        if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
            /* Append the character to the current node. */
            $this->insertText($token['data']);
        /* A comment token */
        } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
            /* Append a Comment node to the current node with the data
            attribute set to the data given in the comment token. */
            $this->insertToken($token['data']);
        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
            // parse error
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
            $this->processWithRulesFor($token, self::IN_BODY);
        /* A start tag whose tag name is "col" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'col') {
            /* Insert a col element for the token. Immediately pop the current
            node off the stack of open elements. */
            $this->insertElement($token);
            array_pop($this->stack);
            // XERROR: Acknowledge the token's self-closing flag, if it is set.
        /* An end tag whose tag name is "colgroup" */
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
        $token['name'] === 'colgroup') {
            /* If the current node is the root html element, then this is a
            parse error, ignore the token. (fragment case) */
            if(end($this->stack)->tagName === 'html') {
                $this->ignored = true;
            /* Otherwise, pop the current node (which will be a colgroup
            element) from the stack of open elements. Switch the insertion
            mode to "in table". */
            } else {
                array_pop($this->stack);
                $this->mode = self::IN_TABLE;
            }
        /* An end tag whose tag name is "col" */
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'col') {
            /* Parse error. Ignore the token. */
            $this->ignored = true;
        /* An end-of-file token */
        /* If the current node is the root html  element */
        } elseif($token['type'] === HTML5_Tokenizer::EOF && end($this->stack)->tagName === 'html') {
            /* Stop parsing */
        /* Anything else */
        } else {
            /* Act as if an end tag with the tag name "colgroup" had been seen,
            and then, if that token wasn't ignored, reprocess the current token. */
            $this->emitToken(array(
                'name' => 'colgroup',
                'type' => HTML5_Tokenizer::ENDTAG
            ));
            if (!$this->ignored) $this->emitToken($token);
        }
    break;
    case self::IN_TABLE_BODY:
        $clear = array('tbody', 'tfoot', 'thead', 'html');
        /* A start tag whose tag name is "tr" */
        if($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'tr') {
            /* Clear the stack back to a table body context. */
            $this->clearStackToTableContext($clear);
            /* Insert a tr element for the token, then switch the insertion
            mode to "in row". */
            $this->insertElement($token);
            $this->mode = self::IN_ROW;
        /* A start tag whose tag name is one of: "th", "td" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
        ($token['name'] === 'th' ||    $token['name'] === 'td')) {
            /* Parse error. Act as if a start tag with the tag name "tr" had
            been seen, then reprocess the current token. */
            $this->emitToken(array(
                'name' => 'tr',
                'type' => HTML5_Tokenizer::STARTTAG,
                'attr' => array()
            ));
            $this->emitToken($token);
        /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
        in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
            /* If the stack of open elements does not have an element in table
            scope with the same tag name as the token, this is a parse error.
            Ignore the token. */
            if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
                // Parse error
                $this->ignored = true;
            /* Otherwise: */
            } else {
                /* Clear the stack back to a table body context. */
                $this->clearStackToTableContext($clear);
                /* Pop the current node from the stack of open elements. Switch
                the insertion mode to "in table". */
                array_pop($this->stack);
                $this->mode = self::IN_TABLE;
            }
        /* A start tag whose tag name is one of: "caption", "col", "colgroup",
        "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */
        } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
        array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead'))) ||
        ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
            /* If the stack of open elements does not have a tbody, thead, or
            tfoot element in table scope, this is a parse error. Ignore the
            token. (fragment case) */
            if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), self::SCOPE_TABLE)) {
                // parse error
                $this->ignored = true;
            /* Otherwise: */
            } else {
                /* Clear the stack back to a table body context. */
                $this->clearStackToTableContext($clear);
                /* Act as if an end tag with the same tag name as the current
                node ("tbody", "tfoot", or "thead") had been seen, then
                reprocess the current token. */
                $this->emitToken(array(
                    'name' => end($this->stack)->tagName,
                    'type' => HTML5_Tokenizer::ENDTAG
                ));
                $this->emitToken($token);
            }
        /* An end tag whose tag name is one of: "body", "caption", "col",
        "colgroup", "html", "td", "th", "tr" */
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
        array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
            /* Parse error. Ignore the token. */
            $this->ignored = true;
        /* Anything else */
        } else {
            /* Process the token as if the insertion mode was "in table". */
            $this->processWithRulesFor($token, self::IN_TABLE);
        }
    break;
    case self::IN_ROW:
        $clear = array('tr', 'html');
        /* A start tag whose tag name is one of: "th", "td" */
        if($token['type'] === HTML5_Tokenizer::STARTTAG &&
        ($token['name'] === 'th' || $token['name'] === 'td')) {
            /* Clear the stack back to a table row context. */
            $this->clearStackToTableContext($clear);
            /* Insert an HTML element for the token, then switch the insertion
            mode to "in cell". */
            $this->insertElement($token);
            $this->mode = self::IN_CELL;
            /* Insert a marker at the end of the list of active formatting
            elements. */
            $this->a_formatting[] = self::MARKER;
        /* An end tag whose tag name is "tr" */
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'tr') {
            /* If the stack of open elements does not have an element in table
            scope with the same tag name as the token, this is a parse error.
            Ignore the token. (fragment case) */
            if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
                // Ignore.
                $this->ignored = true;
            /* Otherwise: */
            } else {
                /* Clear the stack back to a table row context. */
                $this->clearStackToTableContext($clear);
                /* Pop the current node (which will be a tr element) from the
                stack of open elements. Switch the insertion mode to "in table
                body". */
                array_pop($this->stack);
                $this->mode = self::IN_TABLE_BODY;
            }
        /* A start tag whose tag name is one of: "caption", "col", "colgroup",
        "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */
        } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
        array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) ||
        ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
            /* Act as if an end tag with the tag name "tr" had been seen, then,
            if that token wasn't ignored, reprocess the current token. */
            $this->emitToken(array(
                'name' => 'tr',
                'type' => HTML5_Tokenizer::ENDTAG
            ));
            if (!$this->ignored) $this->emitToken($token);
        /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
        in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
            /* If the stack of open elements does not have an element in table
            scope with the same tag name as the token, this is a parse error.
            Ignore the token. */
            if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
                $this->ignored = true;
            /* Otherwise: */
            } else {
                /* Otherwise, act as if an end tag with the tag name "tr" had
                been seen, then reprocess the current token. */
                $this->emitToken(array(
                    'name' => 'tr',
                    'type' => HTML5_Tokenizer::ENDTAG
                ));
                $this->emitToken($token);
            }
        /* An end tag whose tag name is one of: "body", "caption", "col",
        "colgroup", "html", "td", "th" */
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
        array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th'))) {
            /* Parse error. Ignore the token. */
            $this->ignored = true;
        /* Anything else */
        } else {
            /* Process the token as if the insertion mode was "in table". */
            $this->processWithRulesFor($token, self::IN_TABLE);
        }
    break;
    case self::IN_CELL:
        /* An end tag whose tag name is one of: "td", "th" */
        if($token['type'] === HTML5_Tokenizer::ENDTAG &&
        ($token['name'] === 'td' || $token['name'] === 'th')) {
            /* If the stack of open elements does not have an element in table
            scope with the same tag name as that of the token, then this is a
            parse error and the token must be ignored. */
            if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
                $this->ignored = true;
            /* Otherwise: */
            } else {
                /* Generate implied end tags, except for elements with the same
                tag name as the token. */
                $this->generateImpliedEndTags(array($token['name']));
                /* Now, if the current node is not an element with the same tag
                name as the token, then this is a parse error. */
                // XERROR: Implement parse error code
                /* Pop elements from this stack until an element with the same
                tag name as the token has been popped from the stack. */
                do {
                    $node = array_pop($this->stack);
                } while ($node->tagName !== $token['name']);
                /* Clear the list of active formatting elements up to the last
                marker. */
                $this->clearTheActiveFormattingElementsUpToTheLastMarker();
                /* Switch the insertion mode to "in row". (The current node
                will be a tr element at this point.) */
                $this->mode = self::IN_ROW;
            }
        /* A start tag whose tag name is one of: "caption", "col", "colgroup",
        "tbody", "td", "tfoot", "th", "thead", "tr" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
        array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
        'thead', 'tr'))) {
            /* If the stack of open elements does not have a td or th element
            in table scope, then this is a parse error; ignore the token.
            (fragment case) */
            if(!$this->elementInScope(array('td', 'th'), self::SCOPE_TABLE)) {
                // parse error
                $this->ignored = true;
            /* Otherwise, close the cell (see below) and reprocess the current
            token. */
            } else {
                $this->closeCell();
                $this->emitToken($token);
            }
        /* An end tag whose tag name is one of: "body", "caption", "col",
        "colgroup", "html" */
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
        array('body', 'caption', 'col', 'colgroup', 'html'))) {
            /* Parse error. Ignore the token. */
            $this->ignored = true;
        /* An end tag whose tag name is one of: "table", "tbody", "tfoot",
        "thead", "tr" */
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
        array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
            /* If the stack of open elements does not have a td or th element
            in table scope, then this is a parse error; ignore the token.
            (innerHTML case) */
            if(!$this->elementInScope(array('td', 'th'), self::SCOPE_TABLE)) {
                // Parse error
                $this->ignored = true;
            /* Otherwise, close the cell (see below) and reprocess the current
            token. */
            } else {
                $this->closeCell();
                $this->emitToken($token);
            }
        /* Anything else */
        } else {
            /* Process the token as if the insertion mode was "in body". */
            $this->processWithRulesFor($token, self::IN_BODY);
        }
    break;
    case self::IN_SELECT:
        /* Handle the token as follows: */
        /* A character token */
        if(
            $token['type'] === HTML5_Tokenizer::CHARACTER ||
            $token['type'] === HTML5_Tokenizer::SPACECHARACTER
        ) {
            /* Append the token's character to the current node. */
            $this->insertText($token['data']);
        /* A comment token */
        } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
            /* Append a Comment node to the current node with the data
            attribute set to the data given in the comment token. */
            $this->insertComment($token['data']);
        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
            // parse error
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
            $this->processWithRulesFor($token, self::INBODY);
        /* A start tag token whose tag name is "option" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
        $token['name'] === 'option') {
            /* If the current node is an option element, act as if an end tag
            with the tag name "option" had been seen. */
            if(end($this->stack)->tagName === 'option') {
                $this->emitToken(array(
                    'name' => 'option',
                    'type' => HTML5_Tokenizer::ENDTAG
                ));
            }
            /* Insert an HTML element for the token. */
            $this->insertElement($token);
        /* A start tag token whose tag name is "optgroup" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
        $token['name'] === 'optgroup') {
            /* If the current node is an option element, act as if an end tag
            with the tag name "option" had been seen. */
            if(end($this->stack)->tagName === 'option') {
                $this->emitToken(array(
                    'name' => 'option',
                    'type' => HTML5_Tokenizer::ENDTAG
                ));
            }
            /* If the current node is an optgroup element, act as if an end tag
            with the tag name "optgroup" had been seen. */
            if(end($this->stack)->tagName === 'optgroup') {
                $this->emitToken(array(
                    'name' => 'optgroup',
                    'type' => HTML5_Tokenizer::ENDTAG
                ));
            }
            /* Insert an HTML element for the token. */
            $this->insertElement($token);
        /* An end tag token whose tag name is "optgroup" */
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
        $token['name'] === 'optgroup') {
            /* First, if the current node is an option element, and the node
            immediately before it in the stack of open elements is an optgroup
            element, then act as if an end tag with the tag name "option" had
            been seen. */
            $elements_in_stack = count($this->stack);
            if($this->stack[$elements_in_stack - 1]->tagName === 'option' &&
            $this->stack[$elements_in_stack - 2]->tagName === 'optgroup') {
                $this->emitToken(array(
                    'name' => 'option',
                    'type' => HTML5_Tokenizer::ENDTAG
                ));
            }
            /* If the current node is an optgroup element, then pop that node
            from the stack of open elements. Otherwise, this is a parse error,
            ignore the token. */
            if(end($this->stack)->tagName === 'optgroup') {
                array_pop($this->stack);
            } else {
                // parse error
                $this->ignored = true;
            }
        /* An end tag token whose tag name is "option" */
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
        $token['name'] === 'option') {
            /* If the current node is an option element, then pop that node
            from the stack of open elements. Otherwise, this is a parse error,
            ignore the token. */
            if(end($this->stack)->tagName === 'option') {
                array_pop($this->stack);
            } else {
                // parse error
                $this->ignored = true;
            }
        /* An end tag whose tag name is "select" */
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
        $token['name'] === 'select') {
            /* If the stack of open elements does not have an element in table
            scope with the same tag name as the token, this is a parse error.
            Ignore the token. (fragment case) */
            if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
                $this->ignored = true;
                // parse error
            /* Otherwise: */
            } else {
                /* Pop elements from the stack of open elements until a select
                element has been popped from the stack. */
                do {
                    $node = array_pop($this->stack);
                } while ($node->tagName !== 'select');
                /* Reset the insertion mode appropriately. */
                $this->resetInsertionMode();
            }
        /* A start tag whose tag name is "select" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'select') {
            /* Parse error. Act as if the token had been an end tag with the
            tag name "select" instead. */
            $this->emitToken(array(
                'name' => 'select',
                'type' => HTML5_Tokenizer::ENDTAG
            ));
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
        ($token['name'] === 'input' || $token['name'] === 'keygen' ||  $token['name'] === 'textarea')) {
            // parse error
            $this->emitToken(array(
                'name' => 'select',
                'type' => HTML5_Tokenizer::ENDTAG
            ));
            $this->emitToken($token);
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') {
            $this->processWithRulesFor($token, self::IN_HEAD);
        } elseif($token['type'] === HTML5_Tokenizer::EOF) {
            // XERROR: If the current node is not the root html element, then this is a parse error.
            /* Stop parsing */
        /* Anything else */
        } else {
            /* Parse error. Ignore the token. */
            $this->ignored = true;
        }
    break;
    case self::IN_SELECT_IN_TABLE:
        if($token['type'] === HTML5_Tokenizer::STARTTAG &&
        in_array($token['name'], array('caption', 'table', 'tbody',
        'tfoot', 'thead', 'tr', 'td', 'th'))) {
            // parse error
            $this->emitToken(array(
                'name' => 'select',
                'type' => HTML5_Tokenizer::ENDTAG,
            ));
            $this->emitToken($token);
        /* An end tag whose tag name is one of: "caption", "table", "tbody",
        "tfoot", "thead", "tr", "td", "th" */
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
        in_array($token['name'], array('caption', 'table', 'tbody', 'tfoot', 'thead', 'tr', 'td', 'th')))  {
            /* Parse error. */
            // parse error
            /* If the stack of open elements has an element in table scope with
            the same tag name as that of the token, then act as if an end tag
            with the tag name "select" had been seen, and reprocess the token.
            Otherwise, ignore the token. */
            if($this->elementInScope($token['name'], self::SCOPE_TABLE)) {
                $this->emitToken(array(
                    'name' => 'select',
                    'type' => HTML5_Tokenizer::ENDTAG
                ));
                $this->emitToken($token);
            } else {
                $this->ignored = true;
            }
        } else {
            $this->processWithRulesFor($token, self::IN_SELECT);
        }
    break;
    case self::IN_FOREIGN_CONTENT:
        if ($token['type'] === HTML5_Tokenizer::CHARACTER ||
        $token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
            $this->insertText($token['data']);
        } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
            $this->insertComment($token['data']);
        } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
            // XERROR: parse error
        } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
        $token['name'] === 'script' && end($this->stack)->tagName === 'script' &&
        // XDOM
        end($this->stack)->namespaceURI === self::NS_SVG) {
            array_pop($this->stack);
            // a bunch of script running mumbo jumbo
        } elseif (
            ($token['type'] === HTML5_Tokenizer::STARTTAG &&
                ((
                    $token['name'] !== 'mglyph' &&
                    $token['name'] !== 'malignmark' &&
                    // XDOM
                    end($this->stack)->namespaceURI === self::NS_MATHML &&
                    in_array(end($this->stack)->tagName, array('mi', 'mo', 'mn', 'ms', 'mtext'))
                ) ||
                (
                    $token['name'] === 'svg' &&
                    // XDOM
                    end($this->stack)->namespaceURI === self::NS_MATHML &&
                    end($this->stack)->tagName === 'annotation-xml'
                ) ||
                (
                    // XDOM
                    end($this->stack)->namespaceURI === self::NS_SVG &&
                    in_array(end($this->stack)->tagName, array('foreignObject', 'desc', 'title'))
                ) ||
                (
                    // XSKETCHY && XDOM
                    end($this->stack)->namespaceURI === self::NS_HTML
                ))
            ) || $token['type'] === HTML5_Tokenizer::ENDTAG
        ) {
            $this->processWithRulesFor($token, $this->secondary_mode);
            /* If, after doing so, the insertion mode is still "in foreign 
             * content", but there is no element in scope that has a namespace 
             * other than the HTML namespace, switch the insertion mode to the 
             * secondary insertion mode. */
            if ($this->mode === self::IN_FOREIGN_CONTENT) {
                $found = false;
                // this basically duplicates elementInScope()
                for ($i = count($this->stack) - 1; $i >= 0; $i--) {
                    // XDOM
                    $node = $this->stack[$i];
                    if ($node->namespaceURI !== self::NS_HTML) {
                        $found = true;
                        break;
                    } elseif (in_array($node->tagName, array('table', 'html',
                    'applet', 'caption', 'td', 'th', 'button', 'marquee',
                    'object')) || ($node->tagName === 'foreignObject' &&
                    $node->namespaceURI === self::NS_SVG)) {
                        break;
                    }
                }
                if (!$found) {
                    $this->mode = $this->secondary_mode;
                }
            }
        } elseif ($token['type'] === HTML5_Tokenizer::EOF || (
        $token['type'] === HTML5_Tokenizer::STARTTAG &&
        (in_array($token['name'], array('b', "big", "blockquote", "body", "br", 
        "center", "code", "dc", "dd", "div", "dl", "ds", "dt", "em", "embed", "h1", "h2", 
        "h3", "h4", "h5", "h6", "head", "hr", "i", "img", "li", "listing", 
        "menu", "meta", "nobr", "ol", "p", "pre", "ruby", "s",  "small", 
        "span", "strong", "strike",  "sub", "sup", "table", "tt", "u", "ul", 
        "var")) || ($token['name'] === 'font' && ($this->getAttr($token, 'color') ||
        $this->getAttr($token, 'face') || $this->getAttr($token, 'size')))))) {
            // XERROR: parse error
            do {
                $node = array_pop($this->stack);
                // XDOM
            } while ($node->namespaceURI !== self::NS_HTML);
            $this->stack[] = $node;
            $this->mode = $this->secondary_mode;
            $this->emitToken($token);
        } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG) {
            static $svg_lookup = array(
                'altglyph' => 'altGlyph',
                'altglyphdef' => 'altGlyphDef',
                'altglyphitem' => 'altGlyphItem',
                'animatecolor' => 'animateColor',
                'animatemotion' => 'animateMotion',
                'animatetransform' => 'animateTransform',
                'clippath' => 'clipPath',
                'feblend' => 'feBlend',
                'fecolormatrix' => 'feColorMatrix',
                'fecomponenttransfer' => 'feComponentTransfer',
                'fecomposite' => 'feComposite',
                'feconvolvematrix' => 'feConvolveMatrix',
                'fediffuselighting' => 'feDiffuseLighting',
                'fedisplacementmap' => 'feDisplacementMap',
                'fedistantlight' => 'feDistantLight',
                'feflood' => 'feFlood',
                'fefunca' => 'feFuncA',
                'fefuncb' => 'feFuncB',
                'fefuncg' => 'feFuncG',
                'fefuncr' => 'feFuncR',
                'fegaussianblur' => 'feGaussianBlur',
                'feimage' => 'feImage',
                'femerge' => 'feMerge',
                'femergenode' => 'feMergeNode',
                'femorphology' => 'feMorphology',
                'feoffset' => 'feOffset',
                'fepointlight' => 'fePointLight',
                'fespecularlighting' => 'feSpecularLighting',
                'fespotlight' => 'feSpotLight',
                'fetile' => 'feTile',
                'feturbulence' => 'feTurbulence',
                'foreignobject' => 'foreignObject',
                'glyphref' => 'glyphRef',
                'lineargradient' => 'linearGradient',
                'radialgradient' => 'radialGradient',
                'textpath' => 'textPath',
            );
            // XDOM
            $current = end($this->stack);
            if ($current->namespaceURI === self::NS_MATHML) {
                $token = $this->adjustMathMLAttributes($token);
            }
            if ($current->namespaceURI === self::NS_SVG &&
            isset($svg_lookup[$token['name']])) {
                $token['name'] = $svg_lookup[$token['name']];
            }
            if ($current->namespaceURI === self::NS_SVG) {
                $token = $this->adjustSVGAttributes($token);
            }
            $token = $this->adjustForeignAttributes($token);
            $this->insertForeignElement($token, $current->namespaceURI);
            if (isset($token['self-closing'])) {
                array_pop($this->stack);
                // XERROR: acknowledge self-closing flag
            }
        }
    break;
    case self::AFTER_BODY:
        /* Handle the token as follows: */
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
        or U+0020 SPACE */
        if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
            /* Process the token as it would be processed if the insertion mode
            was "in body". */
            $this->processWithRulesFor($token, self::IN_BODY);
        /* A comment token */
        } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
            /* Append a Comment node to the first element in the stack of open
            elements (the html element), with the data attribute set to the
            data given in the comment token. */
            // XDOM
            $comment = $this->dom->createComment($token['data']);
            $this->stack[0]->appendChild($comment);
        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
            // parse error
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
            $this->processWithRulesFor($token, self::IN_BODY);
        /* An end tag with the tag name "html" */
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'html') {
            /*     If the parser was originally created as part of the HTML
             *     fragment parsing algorithm, this is a parse error; ignore
             *     the token. (fragment case) */
            $this->ignored = true;
            // XERROR: implement this
            $this->mode = self::AFTER_AFTER_BODY;
        } elseif($token['type'] === HTML5_Tokenizer::EOF) {
            /* Stop parsing */
        /* Anything else */
        } else {
            /* Parse error. Set the insertion mode to "in body" and reprocess
            the token. */
            $this->mode = self::IN_BODY;
            $this->emitToken($token);
        }
    break;
    case self::IN_FRAMESET:
        /* Handle the token as follows: */
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
        U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
        if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
            /* Append the character to the current node. */
            $this->insertText($token['data']);
        /* A comment token */
        } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
            /* Append a Comment node to the current node with the data
            attribute set to the data given in the comment token. */
            $this->insertComment($token['data']);
        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
            // parse error
        /* A start tag with the tag name "frameset" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
        $token['name'] === 'frameset') {
            $this->insertElement($token);
        /* An end tag with the tag name "frameset" */
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
        $token['name'] === 'frameset') {
            /* If the current node is the root html element, then this is a
            parse error; ignore the token. (fragment case) */
            if(end($this->stack)->tagName === 'html') {
                $this->ignored = true;
                // Parse error
            } else {
                /* Otherwise, pop the current node from the stack of open
                elements. */
                array_pop($this->stack);
                /* If the parser was not originally created as part of the HTML 
                 * fragment parsing algorithm  (fragment case), and the current 
                 * node is no longer a frameset element, then switch the 
                 * insertion mode to "after frameset". */
                $this->mode = self::AFTER_FRAMESET;
            }
        /* A start tag with the tag name "frame" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
        $token['name'] === 'frame') {
            /* Insert an HTML element for the token. */
            $this->insertElement($token);
            /* Immediately pop the current node off the stack of open elements. */
            array_pop($this->stack);
            // XERROR: Acknowledge the token's self-closing flag, if it is set.
        /* A start tag with the tag name "noframes" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
        $token['name'] === 'noframes') {
            /* Process the token using the rules for the "in head" insertion mode. */
            $this->processwithRulesFor($token, self::IN_HEAD);
        } elseif($token['type'] === HTML5_Tokenizer::EOF) {
            // XERROR: If the current node is not the root html element, then this is a parse error.
            /* Stop parsing */
        /* Anything else */
        } else {
            /* Parse error. Ignore the token. */
            $this->ignored = true;
        }
    break;
    case self::AFTER_FRAMESET:
        /* Handle the token as follows: */
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
        U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
        if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
            /* Append the character to the current node. */
            $this->insertText($token['data']);
        /* A comment token */
        } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
            /* Append a Comment node to the current node with the data
            attribute set to the data given in the comment token. */
            $this->insertComment($token['data']);
        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
            // parse error
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
            $this->processWithRulesFor($token, self::IN_BODY);
        /* An end tag with the tag name "html" */
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
        $token['name'] === 'html') {
            $this->mode = self::AFTER_AFTER_FRAMESET;
        /* A start tag with the tag name "noframes" */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
        $token['name'] === 'noframes') {
            $this->processWithRulesFor($token, self::IN_HEAD);
        } elseif($token['type'] === HTML5_Tokenizer::EOF) {
            /* Stop parsing */
        /* Anything else */
        } else {
            /* Parse error. Ignore the token. */
            $this->ignored = true;
        }
    break;
    case self::AFTER_AFTER_BODY:
        /* A comment token */
        if($token['type'] === HTML5_Tokenizer::COMMENT) {
            /* Append a Comment node to the Document object with the data
            attribute set to the data given in the comment token. */
            // XDOM
            $comment = $this->dom->createComment($token['data']);
            $this->dom->appendChild($comment);
        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE ||
        $token['type'] === HTML5_Tokenizer::SPACECHARACTER ||
        ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) {
            $this->processWithRulesFor($token, self::IN_BODY);
        /* An end-of-file token */
        } elseif($token['type'] === HTML5_Tokenizer::EOF) {
            /* OMG DONE!! */
        } else {
            // parse error
            $this->mode = self::IN_BODY;
            $this->emitToken($token);
        }
    break;
    case self::AFTER_AFTER_FRAMESET:
        /* A comment token */
        if($token['type'] === HTML5_Tokenizer::COMMENT) {
            /* Append a Comment node to the Document object with the data
            attribute set to the data given in the comment token. */
            // XDOM
            $comment = $this->dom->createComment($token['data']);
            $this->dom->appendChild($comment);
        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE ||
        $token['type'] === HTML5_Tokenizer::SPACECHARACTER ||
        ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) {
            $this->processWithRulesFor($token, self::IN_BODY);
        /* An end-of-file token */
        } elseif($token['type'] === HTML5_Tokenizer::EOF) {
            /* OMG DONE!! */
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'nofrmaes') {
            $this->processWithRulesFor($token, self::IN_HEAD);
        } else {
            // parse error
        }
    break;
    }
        // end funky indenting
        }
    private function insertElement($token, $append = true) {
        $el = $this->dom->createElementNS(self::NS_HTML, $token['name']);
        if (!empty($token['attr'])) {
            foreach($token['attr'] as $attr) {
                if(!$el->hasAttribute($attr['name'])) {
                    $el->setAttribute($attr['name'], $attr['value']);
                }
            }
        }
        if ($append) {
            $this->appendToRealParent($el);
            $this->stack[] = $el;
        }
        return $el;
    }
    private function insertText($data) {
        if ($data === '') return;
        if ($this->ignore_lf_token) {
            if ($data[0] === "\n") {
                $data = substr($data, 1);
                if ($data === false) return;
            }
        }
        $text = $this->dom->createTextNode($data);
        $this->appendToRealParent($text);
    }
    private function insertComment($data) {
        $comment = $this->dom->createComment($data);
        $this->appendToRealParent($comment);
    }
    private function appendToRealParent($node) {
        // this is only for the foster_parent case
        /* If the current node is a table, tbody, tfoot, thead, or tr
        element, then, whenever a node would be inserted into the current
        node, it must instead be inserted into the foster parent element. */
        if(!$this->foster_parent || !in_array(end($this->stack)->tagName,
        array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
            end($this->stack)->appendChild($node);
        } else {
            $this->fosterParent($node);
        }
    }
    private function elementInScope($el, $scope = self::SCOPE) {
        if(is_array($el)) {
            foreach($el as $element) {
                if($this->elementInScope($element, $scope)) {
                    return true;
                }
            }
            return false;
        }
        $leng = count($this->stack);
        for($n = 0; $n < $leng; $n++) {
            /* 1. Initialise node to be the current node (the bottommost node of
            the stack). */
            $node = $this->stack[$leng - 1 - $n];
            if($node->tagName === $el) {
                /* 2. If node is the target node, terminate in a match state. */
                return true;
                // We've expanded the logic for these states a little differently;
                // Hixie's refactoring into "specific scope" is more general, but
                // this "gets the job done"
            // these are the common states for all scopes
            } elseif($node->tagName === 'table' || $node->tagName === 'html') {
                return false;
            // these are valid for "in scope" and "in list item scope"
            } elseif($scope !== self::SCOPE_TABLE &&
            (in_array($node->tagName, array('applet', 'caption', 'td',
                'th', 'button', 'marquee', 'object')) ||
                $node->tagName === 'foreignObject' && $node->namespaceURI === self::NS_SVG)) {
                return false;
            // these are valid for "in list item scope"
            } elseif($scope === self::SCOPE_LISTITEM && in_array($node->tagName, array('ol', 'ul'))) {
                return false;
            }
            /* Otherwise, set node to the previous entry in the stack of open
            elements and return to step 2. (This will never fail, since the loop
            will always terminate in the previous step if the top of the stack
            is reached.) */
        }
    }
    private function reconstructActiveFormattingElements() {
        /* 1. If there are no entries in the list of active formatting elements,
        then there is nothing to reconstruct; stop this algorithm. */
        $formatting_elements = count($this->a_formatting);
        if($formatting_elements === 0) {
            return false;
        }
        /* 3. Let entry be the last (most recently added) element in the list
        of active formatting elements. */
        $entry = end($this->a_formatting);
        /* 2. If the last (most recently added) entry in the list of active
        formatting elements is a marker, or if it is an element that is in the
        stack of open elements, then there is nothing to reconstruct; stop this
        algorithm. */
        if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
            return false;
        }
        for($a = $formatting_elements - 1; $a >= 0; true) {
            /* 4. If there are no entries before entry in the list of active
            formatting elements, then jump to step 8. */
            if($a === 0) {
                $step_seven = false;
                break;
            }
            /* 5. Let entry be the entry one earlier than entry in the list of
            active formatting elements. */
            $a--;
            $entry = $this->a_formatting[$a];
            /* 6. If entry is neither a marker nor an element that is also in
            thetack of open elements, go to step 4. */
            if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
                break;
            }
        }
        while(true) {
            /* 7. Let entry be the element one later than entry in the list of
            active formatting elements. */
            if(isset($step_seven) && $step_seven === true) {
                $a++;
                $entry = $this->a_formatting[$a];
            }
            /* 8. Perform a shallow clone of the element entry to obtain clone. */
            $clone = $entry->cloneNode();
            /* 9. Append clone to the current node and push it onto the stack
            of open elements  so that it is the new current node. */
            $this->appendToRealParent($clone);
            $this->stack[] = $clone;
            /* 10. Replace the entry for entry in the list with an entry for
            clone. */
            $this->a_formatting[$a] = $clone;
            /* 11. If the entry for clone in the list of active formatting
            elements is not the last entry in the list, return to step 7. */
            if(end($this->a_formatting) !== $clone) {
                $step_seven = true;
            } else {
                break;
            }
        }
    }
    private function clearTheActiveFormattingElementsUpToTheLastMarker() {
        /* When the steps below require the UA to clear the list of active
        formatting elements up to the last marker, the UA must perform the
        following steps: */
        while(true) {
            /* 1. Let entry be the last (most recently added) entry in the list
            of active formatting elements. */
            $entry = end($this->a_formatting);
            /* 2. Remove entry from the list of active formatting elements. */
            array_pop($this->a_formatting);
            /* 3. If entry was a marker, then stop the algorithm at this point.
            The list has been cleared up to the last marker. */
            if($entry === self::MARKER) {
                break;
            }
        }
    }
    private function generateImpliedEndTags($exclude = array()) {
        /* When the steps below require the UA to generate implied end tags, 
         * then, while the current node is a dc element, a dd element, a ds 
         * element, a dt element, an li element, an option element, an optgroup 
         * element, a p element, an rp element, or an rt element, the UA must 
         * pop the current node off the stack of open elements. */
        $node = end($this->stack);
        $elements = array_diff(array('dc', 'dd', 'ds', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude);
        while(in_array(end($this->stack)->tagName, $elements)) {
            array_pop($this->stack);
        }
    }
    private function getElementCategory($node) {
        if (!is_object($node)) debug_print_backtrace();
        $name = $node->tagName;
        if(in_array($name, $this->special))
            return self::SPECIAL;
        elseif(in_array($name, $this->scoping))
            return self::SCOPING;
        elseif(in_array($name, $this->formatting))
            return self::FORMATTING;
        else
            return self::PHRASING;
    }
    private function clearStackToTableContext($elements) {
        /* When the steps above require the UA to clear the stack back to a
        table context, it means that the UA must, while the current node is not
        a table element or an html element, pop elements from the stack of open
        elements. */
        while(true) {
            $name = end($this->stack)->tagName;
            if(in_array($name, $elements)) {
                break;
            } else {
                array_pop($this->stack);
            }
        }
    }
    private function resetInsertionMode() {
        /* 1. Let last be false. */
        $last = false;
        $leng = count($this->stack);
        for($n = $leng - 1; $n >= 0; $n--) {
            /* 2. Let node be the last node in the stack of open elements. */
            $node = $this->stack[$n];
            /* 3. If node is the first node in the stack of open elements, then 
             * set last to true and set node to the context  element. (fragment 
             * case) */
            if($this->stack[0]->isSameNode($node)) {
                $last = true;
                $node = $this->context;
            }
            /* 4. If node is a select element, then switch the insertion mode to
            "in select" and abort these steps. (fragment case) */
            if($node->tagName === 'select') {
                $this->mode = self::IN_SELECT;
                break;
            /* 5. If node is a td or th element, then switch the insertion mode
            to "in cell" and abort these steps. */
            } elseif($node->tagName === 'td' || $node->nodeName === 'th') {
                $this->mode = self::IN_CELL;
                break;
            /* 6. If node is a tr element, then switch the insertion mode to
            "in    row" and abort these steps. */
            } elseif($node->tagName === 'tr') {
                $this->mode = self::IN_ROW;
                break;
            /* 7. If node is a tbody, thead, or tfoot element, then switch the
            insertion mode to "in table body" and abort these steps. */
            } elseif(in_array($node->tagName, array('tbody', 'thead', 'tfoot'))) {
                $this->mode = self::IN_TABLE_BODY;
                break;
            /* 8. If node is a caption element, then switch the insertion mode
            to "in caption" and abort these steps. */
            } elseif($node->tagName === 'caption') {
                $this->mode = self::IN_CAPTION;
                break;
            /* 9. If node is a colgroup element, then switch the insertion mode
            to "in column group" and abort these steps. (innerHTML case) */
            } elseif($node->tagName === 'colgroup') {
                $this->mode = self::IN_COLUMN_GROUP;
                break;
            /* 10. If node is a table element, then switch the insertion mode
            to "in table" and abort these steps. */
            } elseif($node->tagName === 'table') {
                $this->mode = self::IN_TABLE;
                break;
            /* 11. If node is an element from the MathML namespace or the SVG 
             * namespace, then switch the insertion mode to "in foreign 
             * content", let the secondary insertion mode be "in body", and 
             * abort these steps. */
            } elseif($node->namespaceURI === self::NS_SVG ||
            $node->namespaceURI === self::NS_MATHML) {
                $this->mode = self::IN_FOREIGN_CONTENT;
                $this->secondary_mode = self::IN_BODY;
                break;
            /* 12. If node is a head element, then switch the insertion mode
            to "in body" ("in body"! not "in head"!) and abort these steps.
            (fragment case) */
            } elseif($node->tagName === 'head') {
                $this->mode = self::IN_BODY;
                break;
            /* 13. If node is a body element, then switch the insertion mode to
            "in body" and abort these steps. */
            } elseif($node->tagName === 'body') {
                $this->mode = self::IN_BODY;
                break;
            /* 14. If node is a frameset element, then switch the insertion
            mode to "in frameset" and abort these steps. (fragment case) */
            } elseif($node->tagName === 'frameset') {
                $this->mode = self::IN_FRAMESET;
                break;
            /* 15. If node is an html element, then: if the head element
            pointer is null, switch the insertion mode to "before head",
            otherwise, switch the insertion mode to "after head". In either
            case, abort these steps. (fragment case) */
            } elseif($node->tagName === 'html') {
                $this->mode = ($this->head_pointer === null)
                    ? self::BEFORE_HEAD
                    : self::AFTER_HEAD;
                break;
            /* 16. If last is true, then set the insertion mode to "in body"
            and    abort these steps. (fragment case) */
            } elseif($last) {
                $this->mode = self::IN_BODY;
                break;
            }
        }
    }
    private function closeCell() {
        /* If the stack of open elements has a td or th element in table scope,
        then act as if an end tag token with that tag name had been seen. */
        foreach(array('td', 'th') as $cell) {
            if($this->elementInScope($cell, self::SCOPE_TABLE)) {
                $this->emitToken(array(
                    'name' => $cell,
                    'type' => HTML5_Tokenizer::ENDTAG
                ));
                break;
            }
        }
    }
    private function processWithRulesFor($token, $mode) {
        /* "using the rules for the m insertion mode", where m is one of these
         * modes, the user agent must use the rules described under the m
         * insertion mode's section, but must leave the insertion mode
         * unchanged unless the rules in m themselves switch the insertion mode
         * to a new value. */
        return $this->emitToken($token, $mode);
    }
    private function insertCDATAElement($token) {
        $this->insertElement($token);
        $this->original_mode = $this->mode;
        $this->mode = self::IN_CDATA_RCDATA;
        $this->content_model = HTML5_Tokenizer::CDATA;
    }
    private function insertRCDATAElement($token) {
        $this->insertElement($token);
        $this->original_mode = $this->mode;
        $this->mode = self::IN_CDATA_RCDATA;
        $this->content_model = HTML5_Tokenizer::RCDATA;
    }
    private function getAttr($token, $key) {
        if (!isset($token['attr'])) return false;
        $ret = false;
        foreach ($token['attr'] as $keypair) {
            if ($keypair['name'] === $key) $ret = $keypair['value'];
        }
        return $ret;
    }
    private function getCurrentTable() {
        /* The current table is the last table  element in the stack of open 
         * elements, if there is one. If there is no table element in the stack 
         * of open elements (fragment case), then the current table is the 
         * first element in the stack of open elements (the html element). */
        for ($i = count($this->stack) - 1; $i >= 0; $i--) {
            if ($this->stack[$i]->tagName === 'table') {
                return $this->stack[$i];
            }
        }
        return $this->stack[0];
    }
    private function getFosterParent() {
        /* The foster parent element is the parent element of the last
        table element in the stack of open elements, if there is a
        table element and it has such a parent element. If there is no
        table element in the stack of open elements (innerHTML case),
        then the foster parent element is the first element in the
        stack of open elements (the html  element). Otherwise, if there
        is a table element in the stack of open elements, but the last
        table element in the stack of open elements has no parent, or
        its parent node is not an element, then the foster parent
        element is the element before the last table element in the
        stack of open elements. */
        for($n = count($this->stack) - 1; $n >= 0; $n--) {
            if($this->stack[$n]->tagName === 'table') {
                $table = $this->stack[$n];
                break;
            }
        }
        if(isset($table) && $table->parentNode !== null) {
            return $table->parentNode;
        } elseif(!isset($table)) {
            return $this->stack[0];
        } elseif(isset($table) && ($table->parentNode === null ||
        $table->parentNode->nodeType !== XML_ELEMENT_NODE)) {
            return $this->stack[$n - 1];
        }
    }
    public function fosterParent($node) {
        $foster_parent = $this->getFosterParent();
        $table = $this->getCurrentTable(); // almost equivalent to last table element, except it can be html
        /* When a node node is to be foster parented, the node node must be
         * be inserted into the foster parent element. */
        /* If the foster parent element is the parent element of the last table 
         * element in the stack of open elements, then node must be inserted 
         * immediately before the last table element in the stack of open 
         * elements in the foster parent element; otherwise, node must be 
         * appended to the foster parent element. */
        if ($table->tagName === 'table' && $table->parentNode->isSameNode($foster_parent)) {
            $foster_parent->insertBefore($node, $table);
        } else {
            $foster_parent->appendChild($node);
        }
    }
    /**
     * For debugging, prints the stack
     */
    private function printStack() {
        $names = array();
        foreach ($this->stack as $i => $element) {
            $names[] = $element->tagName;
        }
        echo "  -> stack [" . implode(', ', $names) . "]\n";
    }
    /**
     * For debugging, prints active formatting elements
     */
    private function printActiveFormattingElements() {
        if (!$this->a_formatting) return;
        $names = array();
        foreach ($this->a_formatting as $node) {
            if ($node === self::MARKER) $names[] = 'MARKER';
            else $names[] = $node->tagName;
        }
        echo "  -> active formatting [" . implode(', ', $names) . "]\n";
    }
    public function currentTableIsTainted() {
        return !empty($this->getCurrentTable()->tainted);
    }
    /**
     * Sets up the tree constructor for building a fragment.
     */
    public function setupContext($context = null) {
        $this->fragment = true;
        if ($context) {
            $this->context = $this->dom->createElementNS(self::NS_HTML, $context);
            /* 4.1. Set the HTML parser's tokenization  stage's content model
             * flag according to the context element, as follows: */
            switch ($this->context->tagName) {
            case 'title': case 'textarea':
                $this->content_model = HTML5_Tokenizer::RCDATA;
                break;
            case 'style': case 'script': case 'xmp': case 'iframe':
            case 'noembed': case 'noframes':
                $this->content_model = HTML5_Tokenizer::CDATA;
                break;
            case 'noscript':
                // XSCRIPT: assuming scripting is enabled
                $this->content_model = HTML5_Tokenizer::CDATA;
                break;
            case 'plaintext':
                $this->content_model = HTML5_Tokenizer::PLAINTEXT;
                break;
            }
            /* 4.2. Let root be a new html element with no attributes. */
            $root = $this->dom->createElementNS(self::NS_HTML, 'html');
            $this->root = $root;
            /* 4.3 Append the element root to the Document node created above. */
            $this->dom->appendChild($root);
            /* 4.4 Set up the parser's stack of open elements so that it 
             * contains just the single element root. */
            $this->stack = array($root);
            /* 4.5 Reset the parser's insertion mode appropriately. */
            $this->resetInsertionMode();
            /* 4.6 Set the parser's form element pointer  to the nearest node 
             * to the context element that is a form element (going straight up 
             * the ancestor chain, and including the element itself, if it is a 
             * form element), or, if there is no such form element, to null. */
            $node = $this->context;
            do {
                if ($node->tagName === 'form') {
                    $this->form_pointer = $node;
                    break;
                }
            } while ($node = $node->parentNode);
        }
    }
    public function adjustMathMLAttributes($token) {
        foreach ($token['attr'] as &$kp) {
            if ($kp['name'] === 'definitionurl') {
                $kp['name'] = 'definitionURL';
            }
        }
        return $token;
    }
    public function adjustSVGAttributes($token) {
        static $lookup = array(
            'attributename' => 'attributeName',
            'attributetype' => 'attributeType',
            'basefrequency' => 'baseFrequency',
            'baseprofile' => 'baseProfile',
            'calcmode' => 'calcMode',
            'clippathunits' => 'clipPathUnits',
            'contentscripttype' => 'contentScriptType',
            'contentstyletype' => 'contentStyleType',
            'diffuseconstant' => 'diffuseConstant',
            'edgemode' => 'edgeMode',
            'externalresourcesrequired' => 'externalResourcesRequired',
            'filterres' => 'filterRes',
            'filterunits' => 'filterUnits',
            'glyphref' => 'glyphRef',
            'gradienttransform' => 'gradientTransform',
            'gradientunits' => 'gradientUnits',
            'kernelmatrix' => 'kernelMatrix',
            'kernelunitlength' => 'kernelUnitLength',
            'keypoints' => 'keyPoints',
            'keysplines' => 'keySplines',
            'keytimes' => 'keyTimes',
            'lengthadjust' => 'lengthAdjust',
            'limitingconeangle' => 'limitingConeAngle',
            'markerheight' => 'markerHeight',
            'markerunits' => 'markerUnits',
            'markerwidth' => 'markerWidth',
            'maskcontentunits' => 'maskContentUnits',
            'maskunits' => 'maskUnits',
            'numoctaves' => 'numOctaves',
            'pathlength' => 'pathLength',
            'patterncontentunits' => 'patternContentUnits',
            'patterntransform' => 'patternTransform',
            'patternunits' => 'patternUnits',
            'pointsatx' => 'pointsAtX',
            'pointsaty' => 'pointsAtY',
            'pointsatz' => 'pointsAtZ',
            'preservealpha' => 'preserveAlpha',
            'preserveaspectratio' => 'preserveAspectRatio',
            'primitiveunits' => 'primitiveUnits',
            'refx' => 'refX',
            'refy' => 'refY',
            'repeatcount' => 'repeatCount',
            'repeatdur' => 'repeatDur',
            'requiredextensions' => 'requiredExtensions',
            'requiredfeatures' => 'requiredFeatures',
            'specularconstant' => 'specularConstant',
            'specularexponent' => 'specularExponent',
            'spreadmethod' => 'spreadMethod',
            'startoffset' => 'startOffset',
            'stddeviation' => 'stdDeviation',
            'stitchtiles' => 'stitchTiles',
            'surfacescale' => 'surfaceScale',
            'systemlanguage' => 'systemLanguage',
            'tablevalues' => 'tableValues',
            'targetx' => 'targetX',
            'targety' => 'targetY',
            'textlength' => 'textLength',
            'viewbox' => 'viewBox',
            'viewtarget' => 'viewTarget',
            'xchannelselector' => 'xChannelSelector',
            'ychannelselector' => 'yChannelSelector',
            'zoomandpan' => 'zoomAndPan',
        );
        foreach ($token['attr'] as &$kp) {
            if (isset($lookup[$kp['name']])) {
                $kp['name'] = $lookup[$kp['name']];
            }
        }
        return $token;
    }
    public function adjustForeignAttributes($token) {
        static $lookup = array(
            'xlink:actuate' => array('xlink', 'actuate', self::NS_XLINK),
            'xlink:arcrole' => array('xlink', 'arcrole', self::NS_XLINK),
            'xlink:href' => array('xlink', 'href', self::NS_XLINK),
            'xlink:role' => array('xlink', 'role', self::NS_XLINK),
            'xlink:show' => array('xlink', 'show', self::NS_XLINK),
            'xlink:title' => array('xlink', 'title', self::NS_XLINK),
            'xlink:type' => array('xlink', 'type', self::NS_XLINK),
            'xml:base' => array('xml', 'base', self::NS_XML),
            'xml:lang' => array('xml', 'lang', self::NS_XML),
            'xml:space' => array('xml', 'space', self::NS_XML),
            'xmlns' => array(null, 'xmlns', self::NS_XMLNS),
            'xmlns:xlink' => array('xmlns', 'xlink', self::NS_XMLNS),
        );
        foreach ($token['attr'] as &$kp) {
            if (isset($lookup[$kp['name']])) {
                $kp['name'] = $lookup[$kp['name']];
            }
        }
        return $token;
    }
    public function insertForeignElement($token, $namespaceURI) {
        $el = $this->dom->createElementNS($namespaceURI, $token['name']);
        if (!empty($token['attr'])) {
            foreach ($token['attr'] as $kp) {
                $attr = $kp['name'];
                if (is_array($attr)) {
                    $ns = $attr[2];
                    $attr = $attr[1];
                } else {
                    $ns = self::NS_HTML;
                }
                if (!$el->hasAttributeNS($ns, $attr)) {
                    // XSKETCHY: work around godawful libxml bug
                    if ($ns === self::NS_XLINK) {
                        $el->setAttribute('xlink:'.$attr, $kp['value']);
                    } elseif ($ns === self::NS_HTML) {
                        // Another godawful libxml bug
                        $el->setAttribute($attr, $kp['value']);
                    } else {
                        $el->setAttributeNS($ns, $attr, $kp['value']);
                    }
                }
            }
        }
        $this->appendToRealParent($el);
        $this->stack[] = $el;
        // XERROR: see below
        /* If the newly created element has an xmlns attribute in the XMLNS 
         * namespace  whose value is not exactly the same as the element's 
         * namespace, that is a parse error. Similarly, if the newly created 
         * element has an xmlns:xlink attribute in the XMLNS namespace whose 
         * value is not the XLink Namespace, that is a parse error. */
    }
    public function save() {
        $this->dom->normalize();
        if (!$this->fragment) {
            return $this->dom;
        } else {
            if ($this->root) {
                return $this->root->childNodes;
            } else {
                return $this->dom->childNodes;
            }
        }
    }
 }
--- a/thirdparty/html5lib/HTML5/named-character-references.ser
+++ b/thirdparty/html5lib/HTML5/named-character-references.ser
--- a/thirdparty/html5lib/LICENSE
+++ b/thirdparty/html5lib/LICENSE
@ -1,22 +0,0 @@
 Copyright (c) 2006-2011 The Authors
 Contributors:
 James Graham - jg307@cam.ac.uk
 Anne van Kesteren - annevankesteren@gmail.com
 Lachlan Hunt - lachlan.hunt@lachy.id.au
 Matt McDonald - kanashii@kanashii.ca
 Sam Ruby - rubys@intertwingly.net
 Ian Hickson (Google) - ian@hixie.ch
 Thomas Broyer - t.broyer@ltgt.net
 Jacques Distler - distler@golem.ph.utexas.edu
 Henri Sivonen - hsivonen@iki.fi
 Adam Barth - abarth@webkit.org
 Eric Seidel - eric@webkit.org
 The Mozilla Foundation (contributions from Henri Sivonen since 2008)
 David Flanagan (Mozilla) - dflanagan@mozilla.com
 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/thirdparty/html5lib/README
+++ b/thirdparty/html5lib/README
@ -1,47 +0,0 @@
 html5lib - php flavour
 This is an implementation of the tokenization and tree-building parts
 of the HTML5 specification in PHP.  Potential uses of this library
 can be found in web-scrapers and HTML filters.
 Warning: This is a pre-alpha release, and as such, certain parts of
 this code are not up-to-snuff (e.g. error reporting and performance).
 However, the code is very close to spec and passes 100% of tests
 not related to parse errors.  Nevertheless, expect to have to update
 your code on the next upgrade.
 Usage notes:
    <?php
    require_once '/path/to/HTML5/Parser.php';
    $dom = HTML5_Parser::parse('<html><body>...');
    $nodelist = HTML5_Parser::parseFragment('<b>Boo</b><br>');
    $nodelist = HTML5_Parser::parseFragment('<td>Bar</td>', 'table');
 Documentation:
 HTML5_Parser::parse($text)
    $text  : HTML to parse
    return : DOMDocument of parsed document
 HTML5_Parser::parseFragment($text, $context)
    $text    : HTML to parse
    $context : String name of context element
    return   : DOMDocument of parsed document
 Developer notes:
 * To setup unit tests, you need to add a small stub file test-settings.php
  that contains $simpletest_location = 'path/to/simpletest/'; This needs to
  be version 1.1 (or, until that is released, SVN trunk) of SimpleTest.
 * We don't want to ultimately use PHP's DOM because it is not tolerant
  of certain types of errors that HTML 5 allows (for example, an element
  "foo@bar"). But the current implementation uses it, since it's easy.
  Eventually, this html5lib implementation will get a version of SimpleTree;
  and may possibly start using that by default.
    vim: et sw=4 sts=4
--- a/view/Requirements.php
+++ b/view/Requirements.php
@ -660,7 +660,7 @@ class Requirements_Backend {
 			$this->process_combined_files();
 			foreach(array_diff_key($this->javascript,$this->blocked) as $file => $dummy) {
-				$path = $this->path_for_file($file);
+				$path = Convert::raw2xml($this->path_for_file($file));
 				if($path) {
 					$jsRequirements .= "<script type=\"text/javascript\" src=\"$path\"></script>\n";
 				}
@ -677,7 +677,7 @@ class Requirements_Backend {
 			}
 			foreach(array_diff_key($this->css,$this->blocked) as $file => $params) {
-				$path = $this->path_for_file($file);
+				$path = Convert::raw2xml($this->path_for_file($file));
 				if($path) {
 					$media = (isset($params['media']) && !empty($params['media']))
 						? " media=\"{$params['media']}\"" : "";
--- a/view/SSTemplateParser.php
+++ b/view/SSTemplateParser.php
@ -3939,7 +3939,7 @@ class SSTemplateParser extends Parser {
 						$result = $res_671;
 						$this->pos = $pos_671;
 					}
-					if (( $subres = $this->rx( '/./' ) ) !== FALSE) { $result["text"] .= $subres; }
+					if (( $subres = $this->rx( '/(?s)./' ) ) !== FALSE) { $result["text"] .= $subres; }
 					else { $_673 = FALSE; break; }
 					$_673 = TRUE; break;
 				}
--- a/view/SSTemplateParser.php.inc
+++ b/view/SSTemplateParser.php.inc
@ -947,7 +947,7 @@ class SSTemplateParser extends Parser {
 	# This is used to remove template comments
-	Comment: "<%--" (!"--%>" /./)+ "--%>"
+	Comment: "<%--" (!"--%>" /(?s)./)+ "--%>"
 	*/
 	function Comment__construct(&$res) {
 		$res['php'] = '';
--- a/view/SSViewer.php
+++ b/view/SSViewer.php
@ -866,7 +866,7 @@ class SSViewer {
 	 * @param ViewableData $item
 	 * @param SS_Cache $cache Optional cache backend.
 	 *
-	 * @return String Parsed template output.
+	 * @return HTMLText Parsed template output.
 	 */
 	public function process($item, $arguments = null) {
 		SSViewer::$topLevel[] = $item;
@ -934,7 +934,7 @@ class SSViewer {
 			}
 		}
-		return $output;
+		return DBField::create_field('HTMLText', $output, null, array('shortcodes' => false));
 	}
 	/**
--- a/view/ViewableData.php
+++ b/view/ViewableData.php
@ -32,7 +32,7 @@ class ViewableData extends Object implements IteratorAggregate {
 	 *
 	 * @var string
 	 */
-	public static $default_cast = 'HTMLText';
+	public static $default_cast = 'Text';
 	/**
 	 * @var array
@ -318,7 +318,7 @@ class ViewableData extends Object implements IteratorAggregate {
 	 *
 	 * @param string|array|SSViewer $template the template to render into
 	 * @param array $customFields fields to customise() the object with before rendering
-	 * @return string
+	 * @return HTMLText
 	 */
 	public function renderWith($template, $customFields = null) {
 		if(!is_object($template)) {