Added TokenisedRegularExpression for accurate parsing of class files for the manifest. To make it efficient, the parse results of each file are now cached.

git-svn-id: svn://svn.silverstripe.com/silverstripe/open/modules/sapphire/trunk@52320 467b73ca-7a2a-4603-9d3b-597d59a354a9
This commit is contained in:
Sam Minnee 2008-04-08 06:17:58 +00:00
parent 412d76420d
commit 5e8597bf0a
5 changed files with 293 additions and 52 deletions

View File

@ -377,61 +377,93 @@ class ManifestBuilder {
$class="";
if(!$file) die("Couldn't open $filename<br />");
// Remove comments from $file so that we don't make use of a class-def inside a comment
$file = preg_replace('/\/\/.*([\n\r])/','$1', $file);
$file = preg_replace('/\/\*.*\*\//Us','', $file);
// Remove strings from $file so that we don't make use of a class-def inside a strin
$file = str_replace(array("\\'",'\\"'), "{! ESCAPED QUOTE !}", $file);
$file = preg_replace("/'[^']*'/s",'', $file);
$file = preg_replace('/"[^"]*"/s','', $file);
// Remove heredoc strings from $file so that we don't make use of a class-def inside a strin
if(preg_match_all('/<<<(.*)/', $file, $heredocs)) {
foreach($heredocs[1] as $code) {
$file = preg_replace('/<<<' . $code . '\n.*\n' . $code . '[\n;]/s', '', $file);
// We cache the parse results of each file, since only a few files will have changed between flushings
// And, although it's accurate, TokenisedRegularExpression isn't particularly fast
$parseCacheFile = TEMP_FOLDER . "/manifestClassParse-" . str_replace(array("/",":", "\\"),"_", realpath($filename));
if(!file_exists($parseCacheFile) || filemtime($parseCacheFile) < filemtime($filename)) {
$tokens = token_get_all($file);
$classes = self::getClassDefParser()->findAll($tokens);
$interfaces = self::getInterfaceDefParser()->findAll($tokens);
$cacheContent = '<?php
$classes = ' . var_export($classes,true) . ';
$interfaces = ' . var_export($interfaces,true) . ';';
if($fh = fopen($parseCacheFile,'w')) {
fwrite($fh, $cacheContent);
fclose($fh);
}
} else {
include($parseCacheFile);
}
$classes = array();
$size = preg_match_all('/class (.*)[ \n]*{/m', $file, $classes);
for($i=0; $i < $size; $i++) {
//we have a class
$args = split("implements", $classes[1][$i]);
$implements = isset($args[1]) ? $args[1] : null;
$interfaces = explode(",", trim($implements));
$args = split("extends", $args[0]);
$extends = trim(isset($args[1]) ? $args[1] : null);
$class = trim($args[0]);
if($extends) self::$extendsArray[trim($extends)][$class] = $class;
foreach($interfaces as $interface) {
self::$implementsArray[trim($interface)][$class] = $class;
}
self::$classArray[$class] = array(
"interfaces" => $interfaces,
"extends" => $extends,
"file" => $filename
);
foreach($classes as $class) {
$className = $class['className'];
unset($class['className']);
$class['file'] = $filename;
if(!isset($class['extends'])) $class['extends'] = null;
if($class['extends']) self::$extendsArray[$class['extends']][$className] = $className;
if(isset($class['interfaces'])) foreach($class['interfaces'] as $interface) {
self::$implementsArray[$interface][$className] = $className;
}
self::$classArray[$className] = $class;
}
$interfaces = array();
$size = preg_match_all('/interface (.*){/', $file, $interfaces);
for($i=0;$i<$size;$i++) {
$class = trim($interfaces[1][$i]);
self::$classArray[$class] = array(
"interfaces"=>array(),
"extends" => "",
"isinterface"=>true
);
}
foreach($interfaces as $interface) {
$className = $interface['interfaceName'];
unset($interface['interfaceName']);
$interface['file'] = $filename;
if(!isset($interface['extends'])) $interface['extends'] = null;
self::$classArray[$className] = $interface;
}
}
/**
* Returns a {@link TokenisedRegularExpression} object that will parse class definitions
* @return TokenisedRegularExpression
*/
public static function getClassDefParser() {
return new TokenisedRegularExpression(array(
0 => T_CLASS,
1 => T_WHITESPACE,
2 => array(T_STRING, 'can_jump_to' => array(7, 14), 'save_to' => 'className'),
3 => T_WHITESPACE,
4 => T_EXTENDS,
5 => T_WHITESPACE,
6 => array(T_STRING, 'save_to' => 'extends', 'can_jump_to' => 14),
7 => T_WHITESPACE,
8 => T_IMPLEMENTS,
9 => T_WHITESPACE,
10 => array(T_STRING, 'can_jump_to' => 14, 'save_to' => 'interfaces[]'),
11 => array(T_WHITESPACE, 'optional' => true),
12 => array(',', 'can_jump_to' => 10),
13 => array(T_WHITESPACE, 'can_jump_to' => 10),
14 => array(T_WHITESPACE, 'optional' => true),
15 => '{',
));
}
/**
* Returns a {@link TokenisedRegularExpression} object that will parse class definitions
* @return TokenisedRegularExpression
*/
public static function getInterfaceDefParser() {
return new TokenisedRegularExpression(array(
0 => T_INTERFACE,
1 => T_WHITESPACE,
2 => array(T_STRING, 'can_jump_to' => 7, 'save_to' => 'interfaceName'),
3 => T_WHITESPACE,
4 => T_EXTENDS,
5 => T_WHITESPACE,
6 => array(T_STRING, 'save_to' => 'extends'),
7 => array(T_WHITESPACE, 'optional' => true),
8 => '{',
));
}
/**
* Moves through self::$classArray and creates an array containing parent data

View File

@ -0,0 +1,98 @@
<?php
/**
* A tokenised regular expression is a parser, similar to a regular expression, that acts on tokens rather than characters.
* This is a crucial component of the ManifestBuilder.
*/
class TokenisedRegularExpression extends Object {
/**
* The regular expression definition
*/
protected $expression;
function __construct($expression) {
$this->expression = $expression;
}
function findAll($tokens) {
$tokenTypes = array();
foreach($tokens as $i => $token) {
if(is_array($token)) {
$tokenTypes[$i] = $token[0];
} else {
$tokenTypes[$i] = $token;
// Pre-process string tokens for matchFrom()
$tokens[$i] = array($token, $token);
}
}
$startKeys = array_keys($tokenTypes, $this->expression[0]);
$allMatches = array();
foreach($startKeys as $startKey) {
$matches = array();
if($this->matchFrom($startKey, 0, $tokens, $matches)) {
$allMatches[] = $matches;
}
}
return $allMatches;
}
function matchFrom($tokenPos, $expressionPos, &$tokens, &$matches) {
$expressionRule = $this->expression[$expressionPos];
$expectation = is_array($expressionRule) ? $expressionRule[0] : $expressionRule;
if(!is_array($expressionRule)) $expressionRule = array();
if($expectation == $tokens[$tokenPos][0]) {
if(isset($expressionRule['save_to'])) {
// Append to an array
if(substr($expressionRule['save_to'],-2) == '[]') $matches[substr($expressionRule['save_to'],0,-2)][] = $tokens[$tokenPos][1];
// Regular variable setting
else $matches[$expressionRule['save_to']] = $tokens[$tokenPos][1];
}
// End of the expression
if(!isset($this->expression[$expressionPos+1])) {
return true;
// Process next step as normal
} else if($this->matchFrom($tokenPos+1, $expressionPos+1, $tokens, $matches)) {
return true;
// This step is optional
} else if(isset($expressionRule['optional']) && $this->matchFrom($tokenPos, $expressionPos+1, $tokens, $matches)) {
return true;
// Process jumps
} else if(isset($expressionRule['can_jump_to'])) {
if(is_array($expressionRule['can_jump_to'])) foreach($expressionRule['can_jump_to'] as $canJumpTo) {
// can_jump_to & optional both set
if(isset($expressionRule['optional']) && $this->matchFrom($tokenPos, $canJumpTo, $tokens, $matches)) {
return true;
}
// can_jump_to set (optional may or may not be set)
if($this->matchFrom($tokenPos+1, $canJumpTo, $tokens, $matches)) {
return true;
}
} else {
// can_jump_to & optional both set
if(isset($expressionRule['optional']) && $this->matchFrom($tokenPos, $expressionRule['can_jump_to'], $tokens, $matches)) {
return true;
}
// can_jump_to set (optional may or may not be set)
if($this->matchFrom($tokenPos+1, $expressionRule['can_jump_to'], $tokens, $matches)) {
return true;
}
}
}
} else if(isset($expressionRule['optional'])) {
if(isset($this->expression[$expressionPos+1])) return $this->matchFrom($tokenPos, $expressionPos+1, $tokens, $matches);
else return true;
}
return false;
}
}

View File

@ -26,7 +26,7 @@ class MyClass_Other extends DataObject implements Something {
}
class MyClass_Final extends DataObject implements Something, Else {
class MyClass_Final extends DataObject implements Something, OtherClass {
}
@ -63,6 +63,19 @@ MYCODE;
I've included \"an escaped quote\" in this to ensure that it can handle that.
"
/* let's define a class between two strings to confirm that it gets discovered */
class MyClass_ClassBetweenTwoStrings extends DataObject {
}
\$string4 = "class MyClass_InDoubleQuoteString extends DataObject {
}
I've included \"an escaped quote\" in this to ensure that it can handle that.
"
?>
PHP
,

View File

@ -12,6 +12,7 @@ class ManifestBuilderTest extends SapphireTest {
$this->assertContains('MyClass', array_keys($manifestInfo['globals']['_ALL_CLASSES']['exists']));
$this->assertContains('MyClass_Other', array_keys($manifestInfo['globals']['_ALL_CLASSES']['exists']));
$this->assertContains('MyClass_Final', array_keys($manifestInfo['globals']['_ALL_CLASSES']['exists']));
$this->assertContains('MyClass_ClassBetweenTwoStrings', array_keys($manifestInfo['globals']['_ALL_CLASSES']['exists']));
// Check aspects of PHP file
$manifest = ManifestBuilder::generate_php_file($manifestInfo);
@ -68,9 +69,12 @@ class ManifestBuilderTest extends SapphireTest {
}
protected $originalClassManifest, $originalProject;
protected $originalClassManifest, $originalProject, $originalAllClasses;
function setUp() {
// Trick the auto-loder into loading this class before we muck with the manifest
new TokenisedRegularExpression(null);
include('tests/ManifestBuilderTest.fixture.inc');
// Build the fixture specified above
@ -100,15 +104,17 @@ class ManifestBuilderTest extends SapphireTest {
}
}
global $_CLASS_MANIFEST, $project;
global $_CLASS_MANIFEST, $_ALL_CLASSES, $project;
$this->originalAllClasses = $_ALL_CLASSES;
$this->originalClassManifest = $_CLASS_MANIFEST;
$this->originalProject = $project;
}
function tearDown() {
global $_CLASS_MANIFEST, $project;
global $_CLASS_MANIFEST, $_ALL_CLASSES, $project;
$project = $this->originalProject;
$_CLASS_MANIFEST = $this->originalClassManifest;
$_ALL_CLASSES = $this->originalAllClasses;
// Kill the folder after we're done
$baseFolder = TEMP_FOLDER . '/manifest-test/';

View File

@ -0,0 +1,92 @@
<?php
class TokenisedRegularExpressionTest extends SapphireTest {
function getTokens() {
return token_get_all(<<<PHP
<?php
class ClassA {
}
class ClassB{
}
class ClassC extends ParentClassC {
}
class ClassD extends ParentClassD
implements InterfaceA {
}
interface InterfaceA {
}
interface InterfaceB extends Something{
}
class ClassE extends ParentClassE
implements InterfaceA,InterfaceB {
}
class ClassF extends ParentClassF
implements InterfaceA, InterfaceB {
}
?>
PHP
);
}
function testClassDefParser() {
$parser = ManifestBuilder::getClassDefParser();
$tokens = $this->getTokens();
$matches = $parser->findAll($tokens);
$classes = array();
if($matches) foreach($matches as $match) $classes[$match['className']] = $match;
$this->assertArrayHasKey('ClassA', $classes);
$this->assertArrayHasKey('ClassB', $classes);
$this->assertArrayHasKey('ClassC', $classes);
$this->assertEquals('ParentClassC', $classes['ClassC']['extends']);
$this->assertArrayHasKey('ClassD', $classes);
$this->assertEquals('ParentClassD', $classes['ClassD']['extends']);
$this->assertContains('InterfaceA', $classes['ClassD']['interfaces']);
$this->assertArrayHasKey('ClassE', $classes);
$this->assertEquals('ParentClassE', $classes['ClassE']['extends']);
$this->assertContains('InterfaceA', $classes['ClassE']['interfaces']);
$this->assertContains('InterfaceB', $classes['ClassE']['interfaces']);
$this->assertArrayHasKey('ClassF', $classes);
$this->assertEquals('ParentClassF', $classes['ClassF']['extends']);
$this->assertContains('InterfaceA', $classes['ClassF']['interfaces']);
$this->assertContains('InterfaceB', $classes['ClassF']['interfaces']);
}
function testInterfaceDefParser() {
$parser = ManifestBuilder::getInterfaceDefParser();
$tokens = $this->getTokens();
$matches = $parser->findAll($tokens);
$interfaces = array();
if($matches) foreach($matches as $match) $interfaces[$match['interfaceName']] = $match;
$this->assertArrayHasKey('InterfaceA', $interfaces);
$this->assertArrayHasKey('InterfaceB', $interfaces);
$this->assertEquals('Something', $interfaces['InterfaceB']['extends']);
}
}