From 5e8597bf0aeb77a9cf51e2d81445938bde247e90 Mon Sep 17 00:00:00 2001 From: Sam Minnee Date: Tue, 8 Apr 2008 06:17:58 +0000 Subject: [PATCH] Added TokenisedRegularExpression for accurate parsing of class files for the manifest. To make it efficient, the parse results of each file are now cached. git-svn-id: svn://svn.silverstripe.com/silverstripe/open/modules/sapphire/trunk@52320 467b73ca-7a2a-4603-9d3b-597d59a354a9 --- core/ManifestBuilder.php | 128 ++++++++++++++--------- core/TokenisedRegularExpression.php | 98 +++++++++++++++++ tests/ManifestBuilderTest.fixture.inc | 15 ++- tests/ManifestBuilderTest.php | 12 ++- tests/TokenisedRegularExpressionTest.php | 92 ++++++++++++++++ 5 files changed, 293 insertions(+), 52 deletions(-) create mode 100644 core/TokenisedRegularExpression.php create mode 100644 tests/TokenisedRegularExpressionTest.php diff --git a/core/ManifestBuilder.php b/core/ManifestBuilder.php index 3f8995dba..54d1b77aa 100644 --- a/core/ManifestBuilder.php +++ b/core/ManifestBuilder.php @@ -377,61 +377,93 @@ class ManifestBuilder { $class=""; if(!$file) die("Couldn't open $filename
"); - - // Remove comments from $file so that we don't make use of a class-def inside a comment - $file = preg_replace('/\/\/.*([\n\r])/','$1', $file); - $file = preg_replace('/\/\*.*\*\//Us','', $file); - - // Remove strings from $file so that we don't make use of a class-def inside a strin - $file = str_replace(array("\\'",'\\"'), "{! ESCAPED QUOTE !}", $file); - $file = preg_replace("/'[^']*'/s",'', $file); - $file = preg_replace('/"[^"]*"/s','', $file); - // Remove heredoc strings from $file so that we don't make use of a class-def inside a strin - if(preg_match_all('/<<<(.*)/', $file, $heredocs)) { - foreach($heredocs[1] as $code) { - $file = preg_replace('/<<<' . $code . '\n.*\n' . $code . '[\n;]/s', '', $file); + // We cache the parse results of each file, since only a few files will have changed between flushings + // And, although it's accurate, TokenisedRegularExpression isn't particularly fast + $parseCacheFile = TEMP_FOLDER . "/manifestClassParse-" . str_replace(array("/",":", "\\"),"_", realpath($filename)); + if(!file_exists($parseCacheFile) || filemtime($parseCacheFile) < filemtime($filename)) { + $tokens = token_get_all($file); + $classes = self::getClassDefParser()->findAll($tokens); + $interfaces = self::getInterfaceDefParser()->findAll($tokens); + + $cacheContent = ' $interfaces, - "extends" => $extends, - "file" => $filename - ); + foreach($classes as $class) { + $className = $class['className']; + unset($class['className']); + $class['file'] = $filename; + if(!isset($class['extends'])) $class['extends'] = null; + + if($class['extends']) self::$extendsArray[$class['extends']][$className] = $className; + if(isset($class['interfaces'])) foreach($class['interfaces'] as $interface) { + self::$implementsArray[$interface][$className] = $className; } + + self::$classArray[$className] = $class; + } - $interfaces = array(); - $size = preg_match_all('/interface (.*){/', $file, $interfaces); - - for($i=0;$i<$size;$i++) { - $class = trim($interfaces[1][$i]); - self::$classArray[$class] = array( - "interfaces"=>array(), - "extends" => "", - "isinterface"=>true - ); - } + foreach($interfaces as $interface) { + $className = $interface['interfaceName']; + unset($interface['interfaceName']); + $interface['file'] = $filename; + if(!isset($interface['extends'])) $interface['extends'] = null; + self::$classArray[$className] = $interface; + } } + + /** + * Returns a {@link TokenisedRegularExpression} object that will parse class definitions + * @return TokenisedRegularExpression + */ + public static function getClassDefParser() { + return new TokenisedRegularExpression(array( + 0 => T_CLASS, + 1 => T_WHITESPACE, + 2 => array(T_STRING, 'can_jump_to' => array(7, 14), 'save_to' => 'className'), + 3 => T_WHITESPACE, + 4 => T_EXTENDS, + 5 => T_WHITESPACE, + 6 => array(T_STRING, 'save_to' => 'extends', 'can_jump_to' => 14), + 7 => T_WHITESPACE, + 8 => T_IMPLEMENTS, + 9 => T_WHITESPACE, + 10 => array(T_STRING, 'can_jump_to' => 14, 'save_to' => 'interfaces[]'), + 11 => array(T_WHITESPACE, 'optional' => true), + 12 => array(',', 'can_jump_to' => 10), + 13 => array(T_WHITESPACE, 'can_jump_to' => 10), + 14 => array(T_WHITESPACE, 'optional' => true), + 15 => '{', + )); + } + + /** + * Returns a {@link TokenisedRegularExpression} object that will parse class definitions + * @return TokenisedRegularExpression + */ + public static function getInterfaceDefParser() { + return new TokenisedRegularExpression(array( + 0 => T_INTERFACE, + 1 => T_WHITESPACE, + 2 => array(T_STRING, 'can_jump_to' => 7, 'save_to' => 'interfaceName'), + 3 => T_WHITESPACE, + 4 => T_EXTENDS, + 5 => T_WHITESPACE, + 6 => array(T_STRING, 'save_to' => 'extends'), + 7 => array(T_WHITESPACE, 'optional' => true), + 8 => '{', + )); + } + /** * Moves through self::$classArray and creates an array containing parent data diff --git a/core/TokenisedRegularExpression.php b/core/TokenisedRegularExpression.php new file mode 100644 index 000000000..7e3d6b2e7 --- /dev/null +++ b/core/TokenisedRegularExpression.php @@ -0,0 +1,98 @@ +expression = $expression; + } + + function findAll($tokens) { + $tokenTypes = array(); + foreach($tokens as $i => $token) { + if(is_array($token)) { + $tokenTypes[$i] = $token[0]; + } else { + $tokenTypes[$i] = $token; + // Pre-process string tokens for matchFrom() + $tokens[$i] = array($token, $token); + } + } + + $startKeys = array_keys($tokenTypes, $this->expression[0]); + $allMatches = array(); + + foreach($startKeys as $startKey) { + $matches = array(); + if($this->matchFrom($startKey, 0, $tokens, $matches)) { + $allMatches[] = $matches; + } + } + return $allMatches; + } + + function matchFrom($tokenPos, $expressionPos, &$tokens, &$matches) { + $expressionRule = $this->expression[$expressionPos]; + $expectation = is_array($expressionRule) ? $expressionRule[0] : $expressionRule; + if(!is_array($expressionRule)) $expressionRule = array(); + + if($expectation == $tokens[$tokenPos][0]) { + if(isset($expressionRule['save_to'])) { + // Append to an array + if(substr($expressionRule['save_to'],-2) == '[]') $matches[substr($expressionRule['save_to'],0,-2)][] = $tokens[$tokenPos][1]; + // Regular variable setting + else $matches[$expressionRule['save_to']] = $tokens[$tokenPos][1]; + } + + // End of the expression + if(!isset($this->expression[$expressionPos+1])) { + return true; + + // Process next step as normal + } else if($this->matchFrom($tokenPos+1, $expressionPos+1, $tokens, $matches)) { + return true; + + // This step is optional + } else if(isset($expressionRule['optional']) && $this->matchFrom($tokenPos, $expressionPos+1, $tokens, $matches)) { + return true; + + // Process jumps + } else if(isset($expressionRule['can_jump_to'])) { + if(is_array($expressionRule['can_jump_to'])) foreach($expressionRule['can_jump_to'] as $canJumpTo) { + // can_jump_to & optional both set + if(isset($expressionRule['optional']) && $this->matchFrom($tokenPos, $canJumpTo, $tokens, $matches)) { + return true; + } + // can_jump_to set (optional may or may not be set) + if($this->matchFrom($tokenPos+1, $canJumpTo, $tokens, $matches)) { + return true; + } + + } else { + // can_jump_to & optional both set + if(isset($expressionRule['optional']) && $this->matchFrom($tokenPos, $expressionRule['can_jump_to'], $tokens, $matches)) { + return true; + } + // can_jump_to set (optional may or may not be set) + if($this->matchFrom($tokenPos+1, $expressionRule['can_jump_to'], $tokens, $matches)) { + return true; + } + } + } + + } else if(isset($expressionRule['optional'])) { + if(isset($this->expression[$expressionPos+1])) return $this->matchFrom($tokenPos, $expressionPos+1, $tokens, $matches); + else return true; + } + + return false; + + } +} \ No newline at end of file diff --git a/tests/ManifestBuilderTest.fixture.inc b/tests/ManifestBuilderTest.fixture.inc index 3d602809f..5ee20358b 100644 --- a/tests/ManifestBuilderTest.fixture.inc +++ b/tests/ManifestBuilderTest.fixture.inc @@ -26,7 +26,7 @@ class MyClass_Other extends DataObject implements Something { } -class MyClass_Final extends DataObject implements Something, Else { +class MyClass_Final extends DataObject implements Something, OtherClass { } @@ -63,6 +63,19 @@ MYCODE; I've included \"an escaped quote\" in this to ensure that it can handle that. " +/* let's define a class between two strings to confirm that it gets discovered */ +class MyClass_ClassBetweenTwoStrings extends DataObject { + + +} + + +\$string4 = "class MyClass_InDoubleQuoteString extends DataObject { +} + +I've included \"an escaped quote\" in this to ensure that it can handle that. +" + ?> PHP , diff --git a/tests/ManifestBuilderTest.php b/tests/ManifestBuilderTest.php index c39ae7c88..448a34e40 100644 --- a/tests/ManifestBuilderTest.php +++ b/tests/ManifestBuilderTest.php @@ -12,6 +12,7 @@ class ManifestBuilderTest extends SapphireTest { $this->assertContains('MyClass', array_keys($manifestInfo['globals']['_ALL_CLASSES']['exists'])); $this->assertContains('MyClass_Other', array_keys($manifestInfo['globals']['_ALL_CLASSES']['exists'])); $this->assertContains('MyClass_Final', array_keys($manifestInfo['globals']['_ALL_CLASSES']['exists'])); + $this->assertContains('MyClass_ClassBetweenTwoStrings', array_keys($manifestInfo['globals']['_ALL_CLASSES']['exists'])); // Check aspects of PHP file $manifest = ManifestBuilder::generate_php_file($manifestInfo); @@ -68,9 +69,12 @@ class ManifestBuilderTest extends SapphireTest { } - protected $originalClassManifest, $originalProject; + protected $originalClassManifest, $originalProject, $originalAllClasses; function setUp() { + // Trick the auto-loder into loading this class before we muck with the manifest + new TokenisedRegularExpression(null); + include('tests/ManifestBuilderTest.fixture.inc'); // Build the fixture specified above @@ -100,15 +104,17 @@ class ManifestBuilderTest extends SapphireTest { } } - global $_CLASS_MANIFEST, $project; + global $_CLASS_MANIFEST, $_ALL_CLASSES, $project; + $this->originalAllClasses = $_ALL_CLASSES; $this->originalClassManifest = $_CLASS_MANIFEST; $this->originalProject = $project; } function tearDown() { - global $_CLASS_MANIFEST, $project; + global $_CLASS_MANIFEST, $_ALL_CLASSES, $project; $project = $this->originalProject; $_CLASS_MANIFEST = $this->originalClassManifest; + $_ALL_CLASSES = $this->originalAllClasses; // Kill the folder after we're done $baseFolder = TEMP_FOLDER . '/manifest-test/'; diff --git a/tests/TokenisedRegularExpressionTest.php b/tests/TokenisedRegularExpressionTest.php new file mode 100644 index 000000000..5d583c584 --- /dev/null +++ b/tests/TokenisedRegularExpressionTest.php @@ -0,0 +1,92 @@ + +PHP +); + } + + function testClassDefParser() { + $parser = ManifestBuilder::getClassDefParser(); + + $tokens = $this->getTokens(); + + $matches = $parser->findAll($tokens); + $classes = array(); + if($matches) foreach($matches as $match) $classes[$match['className']] = $match; + + $this->assertArrayHasKey('ClassA', $classes); + $this->assertArrayHasKey('ClassB', $classes); + + $this->assertArrayHasKey('ClassC', $classes); + $this->assertEquals('ParentClassC', $classes['ClassC']['extends']); + + $this->assertArrayHasKey('ClassD', $classes); + $this->assertEquals('ParentClassD', $classes['ClassD']['extends']); + $this->assertContains('InterfaceA', $classes['ClassD']['interfaces']); + + $this->assertArrayHasKey('ClassE', $classes); + $this->assertEquals('ParentClassE', $classes['ClassE']['extends']); + $this->assertContains('InterfaceA', $classes['ClassE']['interfaces']); + $this->assertContains('InterfaceB', $classes['ClassE']['interfaces']); + + $this->assertArrayHasKey('ClassF', $classes); + $this->assertEquals('ParentClassF', $classes['ClassF']['extends']); + $this->assertContains('InterfaceA', $classes['ClassF']['interfaces']); + $this->assertContains('InterfaceB', $classes['ClassF']['interfaces']); + } + + function testInterfaceDefParser() { + $parser = ManifestBuilder::getInterfaceDefParser(); + + $tokens = $this->getTokens(); + + $matches = $parser->findAll($tokens); + $interfaces = array(); + if($matches) foreach($matches as $match) $interfaces[$match['interfaceName']] = $match; + + $this->assertArrayHasKey('InterfaceA', $interfaces); + + $this->assertArrayHasKey('InterfaceB', $interfaces); + $this->assertEquals('Something', $interfaces['InterfaceB']['extends']); + } +} \ No newline at end of file