<?php

/**
 * PEG Generator - A PEG Parser for PHP
 *
 * @author Hamish Friedlander / SilverStripe
 *
 * See README.md for documentation
 * 
 */

require 'PHPBuilder.php' ;

class Flags {
	function __construct( $parent = NULL ) {
		$this->parent = $parent ;
		$this->flags = array() ;
	}

	function __set( $k, $v ) {
		$this->flags[$k] = $v ;
		return $v ;
	}

	function __get( $k ) {
		if ( isset( $this->flags[$k] ) ) return $this->flags[$k] ;
		if ( isset( $this->parent ) ) return $this->parent->$k ;
		return NULL ;
	}
}

/**
 * PHPWriter contains several code generation snippets that are used both by the Token and the Rule compiler
 */
class PHPWriter {

	static $varid = 0 ;

	function varid() {
		return '_' . (self::$varid++) ;
	}

	function function_name( $str ) {
		$str = preg_replace( '/-/', '_', $str ?? '' ) ;
		$str = preg_replace( '/\$/', 'DLR', $str ?? '' ) ;
		$str = preg_replace( '/\*/', 'STR', $str ?? '' ) ;
		$str = preg_replace( '/[^\w]+/', '', $str ?? '' ) ;
		return $str ;
	}

	function save($id) {
		return PHPBuilder::build()
			->l(
			'$res'.$id.' = $result;',
			'$pos'.$id.' = $this->pos;'
			);
	}

	function restore( $id, $remove = FALSE ) {
		$code = PHPBuilder::build()
			->l(
			'$result = $res'.$id.';',
			'$this->pos = $pos'.$id.';'
			);

		if ( $remove ) $code->l(
			'unset( $res'.$id.' );',
			'unset( $pos'.$id.' );'
		);

		return $code ;
	}

	function match_fail_conditional( $on, $match = NULL, $fail = NULL ) {
		return PHPBuilder::build()
			->b( 'if (' . $on . ')',
				$match,
				'MATCH'
			)
			->b( 'else',
				$fail,
				'FAIL'
			);
	}
	
	function match_fail_block( $code ) {
		$id = $this->varid() ;

		return PHPBuilder::build()
			->l(
			'$'.$id.' = NULL;'
			)
			->b( 'do',
				$code->replace(array(
					'MBREAK' => '$'.$id.' = TRUE; break;',
					'FBREAK' => '$'.$id.' = FALSE; break;'
				))
			)
			->l(
			'while(0);'
			)
			->b( 'if( $'.$id.' === TRUE )', 'MATCH' )
			->b( 'if( $'.$id.' === FALSE)', 'FAIL'  )
		   ;
	}
}

/**
 * A Token is any portion of a match rule. Tokens are responsible for generating the code to match against them.
 *
 * This base class provides the compile() function, which handles the token modifiers ( ? * + & ! )
 *
 * Each child class should provide the function match_code() which will generate the code to match against that specific token type.
 * In that generated code they should include the lines MATCH or FAIL when a match or a decisive failure occurs. These will
 * be overwritten when they are injected into parent Tokens or Rules. There is no requirement on where MATCH and FAIL can occur.
 * They tokens are also responsible for storing and restoring state when nessecary to handle a non-decisive failure.
 *
 * @author hamish
 *
 */
abstract class Token extends PHPWriter {
	public $optional = FALSE ;
	public $zero_or_more = FALSE ;
	public $one_or_more = FALSE ;
	public $positive_lookahead = FALSE ;
	public $negative_lookahead = FALSE ;
	public $silent = FALSE ;

	public $tag = FALSE ;

	public $type ;
	public $value ;

	function __construct( $type, $value = NULL ) {
		$this->type = $type ;
		$this->value = $value ;
	}

	// abstract protected function match_code() ;

	function compile() {
		$code = $this->match_code($this->value) ;

		$id = $this->varid() ;

		if ( $this->optional ) {
			$code = PHPBuilder::build()
				->l(
				$this->save($id),
				$code->replace( array( 'FAIL' => $this->restore($id,true) ))
				);
		}

		if ( $this->zero_or_more ) {
			$code = PHPBuilder::build()
				->b( 'while (true)',
					$this->save($id),
					$code->replace( array(
						'MATCH' => NULL,
						'FAIL' =>
							$this->restore($id,true)
							->l( 'break;' )
					))
				)
				->l(
				'MATCH'
				);
		}

		if ( $this->one_or_more ) {
			$code = PHPBuilder::build()
				->l(
				'$count = 0;'
				)
				->b( 'while (true)',
					$this->save($id),
					$code->replace( array(
						'MATCH' => NULL,
						'FAIL' =>
							$this->restore($id,true)
							->l( 'break;' )
					)),
					'$count += 1;'
				)
				->b( 'if ($count > 0)', 'MATCH' )
				->b( 'else',            'FAIL' );
		}

		if ( $this->positive_lookahead ) {
			$code = PHPBuilder::build()
				->l(
				$this->save($id),
				$code->replace( array(
					'MATCH' =>
						$this->restore($id)
						->l( 'MATCH' ),
					'FAIL' =>
						$this->restore($id)
						->l( 'FAIL' )
				)));
		}

		if ( $this->negative_lookahead ) {
			$code = PHPBuilder::build()
				->l(
				$this->save($id),
				$code->replace( array(
					'MATCH' =>
						$this->restore($id)
						->l( 'FAIL' ),
					'FAIL' =>
						$this->restore($id)
						->l( 'MATCH' )
				)));
		}

		if ( $this->tag && !($this instanceof TokenRecurse ) ) {
			$code = PHPBuilder::build()
				->l(
				'$stack[] = $result; $result = $this->construct( $matchrule, "'.$this->tag.'" ); ',
				$code->replace(array(
					'MATCH' => PHPBuilder::build()
						->l(
						'$subres = $result; $result = array_pop($stack);',
						'$this->store( $result, $subres, \''.$this->tag.'\' );',
						'MATCH'
						),
					'FAIL' => PHPBuilder::build()
						->l(
						'$result = array_pop($stack);',
						'FAIL'
						)
				)));
		}

		return $code ;
	}
	
}

abstract class TokenTerminal extends Token {
	function set_text( $text ) {
		return $this->silent ? NULL : '$result["text"] .= ' . $text . ';';
	}
		
	protected function match_code( $value ) {
		return $this->match_fail_conditional( '( $subres = $this->'.$this->type.'( '.$value.' ) ) !== FALSE', 
			$this->set_text('$subres')
		);
	}
}

abstract class TokenExpressionable extends TokenTerminal {

	static $expression_rx = '/ \$(\w+) | { \$(\w+) } /x';

	function contains_expression( $value ){
		return preg_match(self::$expression_rx, $value ?? '');
	}

	function expression_replace($matches) {
		return '\'.$this->expression($result, $stack, \'' . (!empty($matches[1]) ? $matches[1] : $matches[2]) . "').'";
	}
	
	function match_code( $value ) {
		$value = preg_replace_callback(self::$expression_rx, array($this, 'expression_replace'), $value ?? '');
		return parent::match_code($value);
	}
}

class TokenLiteral extends TokenExpressionable {
	function __construct( $value ) {
		parent::__construct( 'literal', "'" . substr($value ?? '',1,-1) . "'" );
	}

	function match_code( $value ) {
		// We inline single-character matches for speed
		if ( !$this->contains_expression($value) && strlen( eval( 'return '. $value . ';' ) ) == 1 ) {
			return $this->match_fail_conditional( 'substr($this->string ?? \'\',$this->pos ?? 0,1) == '.$value,
				PHPBuilder::build()->l(
					'$this->pos += 1;',
					$this->set_text($value)
				)
			);
		}
		return parent::match_code($value);
	}
}

class TokenRegex extends TokenExpressionable {
	static function escape( $rx ) {
		$rx = str_replace( "'", "\\'", $rx ?? '' ) ;
		$rx = str_replace( '\\\\', '\\\\\\\\', $rx ?? '' ) ;
		return $rx ;
	}
	
	function __construct( $value ) {
		parent::__construct('rx', self::escape($value));
	}

	function match_code( $value ) {
		return parent::match_code("'{$value}'");
	}
}

class TokenWhitespace extends TokenTerminal {
	function __construct( $optional ) {
		parent::__construct( 'whitespace', $optional ) ;
	}

	/* Call recursion indirectly */
	function match_code( $value ) {
		$code = parent::match_code( '' ) ;
		return $value ? $code->replace( array( 'FAIL' => NULL )) : $code ;
	}
}

class TokenRecurse extends Token {
	function __construct( $value ) {
		parent::__construct( 'recurse', $value ) ;
	}

	function match_function( $value ) {
		return "'".$this->function_name($value)."'";
	}
	
	function match_code( $value ) {
		$function = $this->match_function($value) ;
		$storetag = $this->function_name( $this->tag ? $this->tag : $this->match_function($value) ) ;

		if ( ParserCompiler::$debug ) {
			$debug_header = PHPBuilder::build()
				->l(
				'$indent = str_repeat( " ", $this->depth );',
				'$this->depth += 2;',
				'$sub = ( strlen( $this->string ) - $this->pos > 20 ) ? ( substr( $this->string, $this->pos, 20 ) . "..." ) : substr( $this->string, $this->pos );',
				'$sub = preg_replace( \'/(\r|\n)+/\', " {NL} ", $sub );',
				'print( $indent."Matching against $matcher (".$sub.")\n" );'
				);

			$debug_match = PHPBuilder::build()
				->l(
				'print( $indent."MATCH\n" );',
				'$this->depth -= 2;'
				);

			$debug_fail = PHPBuilder::build()
				->l(
				'print( $indent."FAIL\n" );',
				'$this->depth -= 2;'
				);
		}
		else {
			$debug_header = $debug_match = $debug_fail = NULL ;
		}

		return PHPBuilder::build()->l(
			'$matcher = \'match_\'.'.$function.'; $key = $matcher; $pos = $this->pos;',
			$debug_header,
			'$subres = ( $this->packhas( $key, $pos ) ? $this->packread( $key, $pos ) : $this->packwrite( $key, $pos, $this->$matcher(array_merge($stack, array($result))) ) );',
			$this->match_fail_conditional( '$subres !== FALSE',
				PHPBuilder::build()->l(
					$debug_match,
					$this->tag === FALSE ?
						'$this->store( $result, $subres );' :
						'$this->store( $result, $subres, "'.$storetag.'" );'
				),
				PHPBuilder::build()->l(
					$debug_fail
				)
			));
	}
}

class TokenExpressionedRecurse extends TokenRecurse {
	function match_function( $value ) {
		return '$this->expression($result, $stack, \''.$value.'\')';
	}
}

class TokenSequence extends Token {
	function __construct( $value ) {
		parent::__construct( 'sequence', $value ) ;
	}

	function match_code( $value ) {
		$code = PHPBuilder::build() ;
		foreach( $value as $token ) {
			$code->l(
				$token->compile()->replace(array(
					'MATCH' => NULL,
					'FAIL' => 'FBREAK'
				))
			);
		}
		$code->l( 'MBREAK' );

		return $this->match_fail_block( $code ) ;
	}
}

class TokenOption extends Token {
	function __construct( $opt1, $opt2 ) {
		parent::__construct( 'option', array( $opt1, $opt2 ) ) ;
	}

	function match_code( $value ) {
		$id = $this->varid() ;
		$code = PHPBuilder::build()
			->l(
			$this->save($id)
			) ;

		foreach ( $value as $opt ) {
			$code->l(
				$opt->compile()->replace(array(
					'MATCH' => 'MBREAK',
					'FAIL' => NULL
				)),
				$this->restore($id)
			);
		}
		$code->l( 'FBREAK' ) ;

		return $this->match_fail_block( $code ) ;
	}
}


/**
 * Handles storing of information for an expression that applys to the <i>next</i> token, and deletion of that
 * information after applying
 *
 * @author Hamish Friedlander
 */
class Pending {
	function __construct() {
		$this->what = NULL ;
	}

	function set( $what, $val = TRUE ) {
		$this->what = $what ;
		$this->val = $val ;
	}

	function apply_if_present( $on ) {
		if ( $this->what !== NULL ) {
			$what = $this->what ;
			$on->$what = $this->val ;

			$this->what = NULL ;
		}
	}
}

/**
 * Rule parsing and code generation
 *
 * A rule is the basic unit of a PEG. This parses one rule, and generates a function that will match on a string
 *
 * @author Hamish Friedlander
 */
class Rule extends PHPWriter {

	static $rule_rx = '@
	(?<name> \w+)                         # The name of the rule
	( \s+ extends \s+ (?<extends>\w+) )?  # The extends word
	( \s* \( (?<arguments>.*) \) )?       # Any variable setters
	(
		\s*(?<matchmark>:) |               # Marks the matching rule start
		\s*(?<replacemark>;) |             # Marks the replacing rule start
		\s*$
	)
	(?<rule>[\s\S]*)
	@x';

	static $argument_rx = '@
	( [^=]+ )    # Name
	=            # Seperator
	( [^=,]+ )   # Variable
	(,|$)
	@x';
	
	static $replacement_rx = '@
	( ([^=]|=[^>])+ )    # What to replace
	=>                   # The replacement mark
	( [^,]+ )            # What to replace it with
	(,|$)
	@x';
	
	static $function_rx = '@^\s+function\s+([^\s(]+)\s*(.*)@' ;

	protected $parser;
	protected $lines;
	
	public $name;
	public $extends;
	public $mode;
	public $rule;
	
	function __construct($parser, $lines) {
		$this->parser = $parser;
		$this->lines = $lines;
		
		// Find the first line (if any) that's an attached function definition. Can skip first line (unless this block is malformed)
		for ($i = 1; $i < count($lines ?? []); $i++) {
			if (preg_match(self::$function_rx, $lines[$i] ?? '')) break;
		}
		
		// Then split into the two parts
		$spec = array_slice($lines ?? [], 0, $i);
		$funcs = array_slice($lines ?? [], $i ?? 0);
		
		// Parse out the spec
		$spec = implode("\n", $spec);
		if (!preg_match(self::$rule_rx, $spec ?? '', $specmatch)) user_error('Malformed rule spec ' . $spec, E_USER_ERROR);
		
		$this->name = $specmatch['name'];
		
		if ($specmatch['extends']) {
			$this->extends = $this->parser->rules[$specmatch['extends']];
			if (!$this->extends) user_error('Extended rule '.$specmatch['extends'].' is not defined before being extended', E_USER_ERROR);
		}
		
		$this->arguments = array();
		
		if ($specmatch['arguments']) {
			preg_match_all(self::$argument_rx, $specmatch['arguments'] ?? '', $arguments, PREG_SET_ORDER);
			
			foreach ($arguments as $argument){
				$this->arguments[trim($argument[1])] = trim($argument[2] ?? '');
			}
		}
		
		$this->mode = $specmatch['matchmark'] ? 'rule' : 'replace';
		
		if ($this->mode == 'rule') {
			$this->rule = $specmatch['rule'];
			$this->parse_rule() ;
		}
		else {
			if (!$this->extends) user_error('Replace matcher, but not on an extends rule', E_USER_ERROR);
			
			$this->replacements = array();
			preg_match_all(self::$replacement_rx, $specmatch['rule'] ?? '', $replacements, PREG_SET_ORDER);
			
			$rule = $this->extends->rule;
			
			foreach ($replacements as $replacement) {
				$search = trim($replacement[1] ?? '');
				$replace = trim($replacement[3] ?? ''); if ($replace == "''" || $replace == '""') $replace = "";
				
				$rule = str_replace($search ?? '', ' '.$replace.' ', $rule ?? '');
			}
			
			$this->rule = $rule;
			$this->parse_rule() ;
		}
		
		// Parse out the functions
		
		$this->functions = array() ;

		$active_function = NULL ;

		foreach( $funcs as $line ) {
			/* Handle function definitions */
			if ( preg_match( self::$function_rx, $line ?? '', $func_match, 0 ) ) {
				$active_function = $func_match[1];
				$this->functions[$active_function] = $func_match[2] . PHP_EOL;
			}
			else $this->functions[$active_function] .= $line . PHP_EOL ;
		}
	}

	/* Manual parsing, because we can't bootstrap ourselves yet */
	function parse_rule() {
		$rule = trim( $this->rule ?? '' ) ;

		/* If this is a regex end-token, just mark it and return */
		if ( substr( $rule ?? '', 0, 1 ) == '/' ) {
			$this->parsed = new TokenRegex( $rule ) ;
		}
		else {
			$tokens = array() ;
			$this->tokenize( $rule, $tokens ) ;
			$this->parsed = ( count( $tokens ?? [] ) == 1 ? array_pop( $tokens ) : new TokenSequence( $tokens ) ) ;
		}
		
	}

	static $rx_rx = '{^/(
		((\\\\\\\\)*\\\\/) # Escaped \/, making sure to catch all the \\ first, so that we dont think \\/ is an escaped /
		|
		[^/]               # Anything except /
	)*/}xu' ;

	function tokenize( $str, &$tokens, $o = 0 ) {

		$pending = new Pending() ;

		while ( $o < strlen( $str ?? '' ) ) {
			$sub = substr( $str ?? '', $o ?? 0 ) ;

			/* Absorb white-space */
			if ( preg_match( '/^\s+/', $sub ?? '', $match ) ) {
				$o += strlen( $match[0] ?? '' ) ;
			}
			/* Handle expression labels */
			elseif ( preg_match( '/^(\w*):/', $sub ?? '', $match ) ) {
				$pending->set( 'tag', isset( $match[1] ) ? $match[1] : '' ) ;
				$o += strlen( $match[0] ?? '' ) ;
			}
			/* Handle descent token */
			elseif ( preg_match( '/^[\w-]+/', $sub ?? '', $match ) ) {
				$tokens[] = $t = new TokenRecurse( $match[0] ) ; $pending->apply_if_present( $t ) ;
				$o += strlen( $match[0] ?? '' ) ;
			}
			/* Handle " quoted literals */
			elseif ( preg_match( '/^"[^"]*"/', $sub ?? '', $match ) ) {
				$tokens[] = $t = new TokenLiteral( $match[0] ) ; $pending->apply_if_present( $t ) ;
				$o += strlen( $match[0] ?? '' ) ;
			}
			/* Handle ' quoted literals */
			elseif ( preg_match( "/^'[^']*'/", $sub ?? '', $match ) ) {
				$tokens[] = $t = new TokenLiteral( $match[0] ) ; $pending->apply_if_present( $t ) ;
				$o += strlen( $match[0] ?? '' ) ;
			}
			/* Handle regexs */
			elseif ( preg_match( self::$rx_rx, $sub ?? '', $match ) ) {
				$tokens[] = $t = new TokenRegex( $match[0] ) ; $pending->apply_if_present( $t ) ;
				$o += strlen( $match[0] ?? '' ) ;
			}
			/* Handle $ call literals */
			elseif ( preg_match( '/^\$(\w+)/', $sub ?? '', $match ) ) {
				$tokens[] = $t = new TokenExpressionedRecurse( $match[1] ) ; $pending->apply_if_present( $t ) ;
				$o += strlen( $match[0] ?? '' ) ;
			}
			/* Handle flags */
			elseif ( preg_match( '/^\@(\w+)/', $sub ?? '', $match ) ) {
				$l = count( $tokens ?? [] ) - 1 ;
				$o += strlen( $match[0] ?? '' ) ;
				user_error( "TODO: Flags not currently supported", E_USER_WARNING ) ;
			}
			/* Handle control tokens */
			else {
				$c = substr( $sub ?? '', 0, 1 ) ;
				$l = count( $tokens ?? [] ) - 1 ;
				$o += 1 ;
				switch( $c ) {
					case '?':
						$tokens[$l]->optional = TRUE ;
						break ;
					case '*':
						$tokens[$l]->zero_or_more = TRUE ;
						break ;
					case '+':
						$tokens[$l]->one_or_more = TRUE ;
						break ;

					case '&':
						$pending->set( 'positive_lookahead' ) ;
						break ;
					case '!':
						$pending->set( 'negative_lookahead' ) ;
						break ;
						
					case '.':
						$pending->set( 'silent' );
						break;

					case '[':
					case ']':
						$tokens[] = new TokenWhitespace( FALSE ) ;
						break ;
					case '<':
					case '>':
						$tokens[] = new TokenWhitespace( TRUE ) ;
						break ;

					case '(':
						$subtokens = array() ;
						$o = $this->tokenize( $str, $subtokens, $o ) ;
						$tokens[] = $t = new TokenSequence( $subtokens ) ; $pending->apply_if_present( $t ) ;
						break ;
					case ')':
						return $o ;

					case '|':
						$option1 = $tokens ;
						$option2 = array() ;
						$o = $this->tokenize( $str, $option2, $o ) ;

						$option1 = (count($option1) == 1) ? $option1[0] : new TokenSequence( $option1 );
						$option2 = (count($option2) == 1) ? $option2[0] : new TokenSequence( $option2 );

						$pending->apply_if_present( $option2 ) ;

						$tokens = array( new TokenOption( $option1, $option2 ) ) ;
						return $o ;

					default:
						user_error( "Can't parser $c - attempting to skip", E_USER_WARNING ) ;
				}
			}
		}

		return $o ;
	}

	/**
	 * Generate the PHP code for a function to match against a string for this rule
	 */
	function compile($indent) {
		$function_name = $this->function_name( $this->name ) ;
		
		// Build the typestack
		$typestack = array(); $class=$this;
		do {
			$typestack[] = $this->function_name($class->name);
		}
		while($class = $class->extends);

		$typestack = "array('" . implode("','", $typestack) . "')";
		
		// Build an array of additional arguments to add to result node (if any)
		if (empty($this->arguments)) {
			$arguments = 'null';
		}
		else {
			$arguments = "array(";
			foreach ($this->arguments as $k=>$v) { $arguments .= "'$k' => '$v'"; }
			$arguments .= ")";
		}
		
		$match = PHPBuilder::build() ;
		
		$match->l("protected \$match_{$function_name}_typestack = $typestack;");

		$match->b( "function match_{$function_name} (\$stack = array())",
			'$matchrule = "'.$function_name.'"; $result = $this->construct($matchrule, $matchrule, '.$arguments.');',
			$this->parsed->compile()->replace(array(
				'MATCH' => 'return $this->finalise($result);',
				'FAIL' => 'return FALSE;'
			))
		);

		$functions = array() ;
		foreach( $this->functions as $name => $function ) {
			$function_name = $this->function_name( preg_match( '/^_/', $name ?? '' ) ? $this->name.$name : $this->name.'_'.$name ) ;
			$functions[] = implode( PHP_EOL, array(
				'function ' . $function_name . ' ' . $function
			));
		}

		// print_r( $match ) ; return '' ;
		return $match->render(NULL, $indent) . PHP_EOL . PHP_EOL . implode( PHP_EOL, $functions ) ;
	}
}

class RuleSet {
	public $rules = array();

	function addRule($indent, $lines, &$out) {
		$rule = new Rule($this, $lines) ;
		$this->rules[$rule->name] = $rule;
		
		$out[] = $indent . '/* ' . $rule->name . ':' . $rule->rule . ' */' . PHP_EOL ;
		$out[] = $rule->compile($indent) ;
		$out[] = PHP_EOL ;
	}
	
	function compile($indent, $rulestr) {
		$indentrx = '@^'.preg_quote($indent ?? '').'@';
		
		$out = array();
		$block = array();
		
		foreach (preg_split('/\r\n|\r|\n/', $rulestr ?? '') as $line) {
			// Ignore blank lines
			if (!trim($line ?? '')) continue;
			// Ignore comments
			if (preg_match('/^[\x20|\t]+#/', $line ?? '')) continue;
			
			// Strip off indent
			if (!empty($indent)) { 
				if (strpos($line ?? '', $indent ?? '') === 0) $line = substr($line ?? '', strlen($indent ?? ''));
				else user_error('Non-blank line with inconsistent index in parser block', E_USER_ERROR);
			}
			
			// Any indented line, add to current set of lines
			if (preg_match('/^\x20|\t/', $line ?? '')) $block[] = $line;
			
			// Any non-indented line marks a new block. Add a rule for the current block, then start a new block
			else {
				if (count($block ?? [])) $this->addRule($indent, $block, $out);
				$block = array($line);
			}
		}
		
		// Any unfinished block add a rule for
		if (count($block ?? [])) $this->addRule($indent, $block, $out);
		
		// And return the compiled version
		return implode( '', $out ) ;
	}
}

class ParserCompiler {

	static $parsers = array();
	
	static $debug = false;

	static $currentClass = null;

	static function create_parser( $match ) {
		/* We allow indenting of the whole rule block, but only to the level of the comment start's indent */
		$indent = $match[1];
		
		/* Get the parser name for this block */
		if     ($class = trim($match[2] ?? '')) self::$currentClass = $class;
		elseif (self::$currentClass)      $class = self::$currentClass;
		else                              $class = self::$currentClass = 'Anonymous Parser';
		
		/* Check for pragmas */
		if (strpos($class ?? '', '!') === 0) {
			switch ($class) {
				case '!silent':
					// NOP - dont output
					return '';
				case '!insert_autogen_warning':
					return $indent . implode(PHP_EOL.$indent, array(
						'/*',
						'WARNING: This file has been machine generated. Do not edit it, or your changes will be overwritten next time it is compiled.',
						'*/'
					)) . PHP_EOL;
				case '!debug':
					self::$debug = true;
					return '';
			}
			
			throw new Exception("Unknown pragma $class encountered when compiling parser");
		}
		
		if (!isset(self::$parsers[$class])) self::$parsers[$class] = new RuleSet();
		
		return self::$parsers[$class]->compile($indent, $match[3]);
	}

	static function compile( $string ) {
		static $rx = '@
			^([\x20\t]*)/\*!\* (?:[\x20\t]*(!?\w*))?   # Start with some indent, a comment with the special marker, then an optional name
			((?:[^*]|\*[^/])*)                         # Any amount of "a character that isnt a star, or a star not followed by a /
			\*/                                        # The comment end
		@mx';

		return preg_replace_callback( $rx ?? '', array( 'ParserCompiler', 'create_parser' ), $string ?? '' ) ;
	}

	static function cli( $args ) {
		if ( count( $args ?? [] ) == 1 ) {
			print "Parser Compiler: A compiler for PEG parsers in PHP \n" ;
			print "(C) 2009 SilverStripe. See COPYING for redistribution rights. \n" ;
			print "\n" ;
			print "Usage: {$args[0]} infile [ outfile ]\n" ;
			print "\n" ;
		}
		else {
			$fname = ( $args[1] == '-' ? 'php://stdin' : $args[1] ) ;
			$string = file_get_contents( $fname ?? '' ) ;
			$string = self::compile( $string ) ;

			if ( !empty( $args[2] ) && $args[2] != '-' ) {
				file_put_contents( $args[2] ?? '', $string ) ;
			}
			else {
				print $string ;
			}
		}
	}
}