From c314d0b6595f703f5391ee88ab6a9f265f443ff5 Mon Sep 17 00:00:00 2001 From: Julian Seidenberg Date: Mon, 16 Apr 2012 17:24:48 +1200 Subject: [PATCH] API-CHANGE: i18nTextCollector can now extract the new translatable entities (<%t) from templates and populate them in language tables (uses PEG parser) --- i18n/i18nTextCollector.php | 230 ++++++++++++++++++++------- tests/i18n/i18nTextCollectorTest.php | 62 +++++++- 2 files changed, 232 insertions(+), 60 deletions(-) diff --git a/i18n/i18nTextCollector.php b/i18n/i18nTextCollector.php index db670c397..612719011 100644 --- a/i18n/i18nTextCollector.php +++ b/i18n/i18nTextCollector.php @@ -195,70 +195,134 @@ class i18nTextCollector extends Object { } public function collectFromCode($content, $module) { - $entities = array(); + $entitiesArr = array(); - $tokens = token_get_all(" 0) { //we have at least one match + + //take all the matched _t entities + foreach($matchesArray[1] as $match) { + //replace all commas with backticks (unique character to explode on later) + $replacedMatch = preg_replace('/("|\'|_LOW|_MEDIUM|_HIGH)\s*,\s*([\'"]|"|\'|array|PR)/','$1`$2',$match); //keep array text + + //$replacedMatch = trim($replacedMatch," \"'\n"); //remove starting and ending quotes + $replacedMatch = trim($replacedMatch," \n"); //remove starting and ending spaces and newlines + + $parts = explode('`',$replacedMatch); //cut up the _t call + + $partsWOQuotes = array(); + foreach($parts as $part) { + $part = trim($part,"\n"); //remove spaces and newlines from part + + $firstChar = substr($part,0,1); + if ($firstChar == "'" || $firstChar == '"') { + //remove wrapping quotes + $part = substr($part,1,-1); + + //remove inner concatenation + $part = preg_replace("/$firstChar\\s*\\.\\s*$firstChar/",'',$part); } - - if($inConcat) { - $currentEntity[count($currentEntity)-1] .= $text; - } else { - $currentEntity[] = $text; - } - } - } elseif($inTransFn && $token == '.') { - $inConcat = true; - } elseif($inTransFn && $token == ',') { - $inConcat = false; - } elseif($inTransFn && $token == ')') { - // finalize definition - $inTransFn = false; - $inConcat = false; - $entity = array_shift($currentEntity); - $entities[$entity] = $currentEntity; - $currentEntity = array(); - } - } - - foreach($entities as $entity => $spec) { - // call without master language definition - if(!$spec) { - unset($entities[$entity]); - continue; - } - unset($entities[$entity]); - $entities[$this->normalizeEntity($entity, $module)] = $spec; + $partsWOQuotes[] = $part; //remove starting and ending quotes from inner parts + } + + if ($parts && count($partsWOQuotes) > 0) { + + $entitiesArr = array_merge($entitiesArr, (array)$this->entitySpecFromNewParts($partsWOQuotes)); + } + } } - ksort($entities); - - return $entities; + + ksort($entitiesArr); + + return $entitiesArr; } + + /** + * Test if one string starts with another + */ + protected function startsWith($haystack, $needle) { + $length = strlen($needle); + return (substr($haystack, 0, $length) === $needle); + } + + /** + * Converts a parts array from explode function into an array of entities for the i18n text collector + * @return array + */ + protected function entitySpecFromNewParts($parts, $namespace = null) { + // first thing in the parts array will always be the entity + // split fullname into entity parts + //set defaults + $value = ""; + $prio = null; + $comment = null; + + $entityParts = explode('.', $parts[0]); + if(count($entityParts) > 1) { + // templates don't have a custom namespace + $entity = array_pop($entityParts); + // namespace might contain dots, so we explode + $namespace = implode('.',$entityParts); + } else { + $entity = array_pop($entityParts); + $namespace = $namespace; + } + + //find the array (if found, then we are dealing with the new _t syntax + $newSyntax = false; + $offset = 0; + foreach($parts as $p) { + if ($this->startsWith($p,'array')) { //remove everything after (and including) the array + $newSyntax = true; + $parts = array_splice($parts,0,$offset); + break; + } + $offset++; + } + + //2nd part of array is always "string" + if (isset($parts[1])) $value = $parts[1]; + + + //3rd part can either be priority or context, if old or now syntax is used + if (isset($parts[2])) { + if ($newSyntax) { + $prio = 40; //default priority + $comment = $parts[2]; + } else { + if (stripos($parts[2], 'PR_LOW') !== false || + stripos($parts[2], 'PR_MEDIUM') !== false || + stripos($parts[2], 'PR_HIGH') !== false) { //definitely old syntax + $prio = $parts[2]; + } else { //default to new syntax (3rd position is comment/context + $prio = 40; //default priority + $comment = $parts[2]; + } + } + } + + //if 4th position is set then this is old syntax and it is the context + //it would be array in the new syntax and therefore should have already been spliced off + if (isset($parts[3])) { + $comment = $parts[3]; + $prio = $parts[2]; //3rd position is now definitely priority + } + + return array( + "{$namespace}.{$entity}" => array( + $value, + $prio, + $comment + ) + ); + } + + + public function collectFromTemplate($content, $fileName, $module) { $entities = array(); @@ -276,6 +340,11 @@ class i18nTextCollector extends Object { // @todo Will get massively confused if you include the includer -> infinite loop } + // use parser to extract <%t style translatable entities + $translatables = i18nTextCollector_Parser::GetTranslatables($content); + $entities = array_merge($entities,(array)$translatables); + + // use the old method of getting _t() style translatable entities // Collect in actual template if(preg_match_all('/<%\s*(_t\(.*)%>/ms', $content, $matches)) { foreach($matches as $match) { @@ -516,4 +585,49 @@ class i18nTextCollector_Writer_RailsYaml implements i18nTextCollector_Writer { // TODO Dumper can't handle YAML comments, so the context information is currently discarded return $yamlHandler->dump(array($locale => $entitiesNested), 99); } +} + +/** + * Parser that scans through a template and extracts the parameters to the _t and <%t calls + */ +class i18nTextCollector_Parser extends SSTemplateParser { + + static $entities = array(); + static $currentEntity = array(); + + function Translate__construct(&$res) { + self::$currentEntity = array(null,null,null); //start with empty array + } + + function Translate_Entity(&$res, $sub) { + self::$currentEntity[0] = $sub['text']; //entity + } + + function Translate_Default(&$res, $sub) { + self::$currentEntity[1] = $sub['String']['text']; //value + } + + function Translate_Context(&$res, $sub) { + self::$currentEntity[2] = $sub['String']['text']; //comment + } + + function Translate__finalise(&$res) { + // set the entity name and the value (default), as well as the context (comment) + // priority is no longer used, so that is blank + self::$entities[self::$currentEntity[0]] = array(self::$currentEntity[1],null,self::$currentEntity[2]); + } + + /** + * Parses a template and returns any translatable entities + */ + static function GetTranslatables($template) { + self::$entities = array(); + + // Run the parser and throw away the result + $parser = new i18nTextCollector_Parser($template); + if(substr($template, 0,3) == pack("CCC", 0xef, 0xbb, 0xbf)) $parser->pos = 3; + $parser->match_TopTemplate(); + + return self::$entities; + } } \ No newline at end of file diff --git a/tests/i18n/i18nTextCollectorTest.php b/tests/i18n/i18nTextCollectorTest.php index 14e25cf6c..53f483aae 100644 --- a/tests/i18n/i18nTextCollectorTest.php +++ b/tests/i18n/i18nTextCollectorTest.php @@ -58,7 +58,7 @@ _t( _t( 'Test.CONCATENATED2', -"Line \"4\" and " . +"Line "4" and " . "Line 5"); PHP; $this->assertEquals( @@ -68,7 +68,36 @@ PHP; 'Test.CONCATENATED2' => array("Line \"4\" and Line 5") ) ); - } + } + + function testCollectFromNewTemplateSyntaxUsingParserSubclass() { + $c = new i18nTextCollector(); + + $html = << +<%t i18nTestModule.NEWMETHODSIG "New _t method signature test" %> +<%t i18nTestModule.INJECTIONS_0 "Hello {name} {greeting}. But it is late, {goodbye}" name="Mark" greeting="welcome" goodbye="bye" %> +<%t i18nTestModule.INJECTIONS_1 "Hello {name} {greeting}. But it is late, {goodbye}" name="Paul" greeting="good you are here" goodbye="see you" %> +<%t i18nTestModule.INJECTIONS_2 "Hello {name} {greeting}. But it is late, {goodbye}" is "New context (this should be ignored)" name="Steffen" greeting="willkommen" goodbye="wiedersehen" %> +<%t i18nTestModule.INJECTIONS_3 name="Cat" greeting='meow' goodbye="meow" %> +<%t i18nTestModule.INJECTIONS_4 name=\$absoluteBaseURL greeting=\$get_locale goodbye="global calls" %> +SS; + $c->collectFromTemplate($html, 'mymodule', 'Test'); + + $this->assertEquals( + $c->collectFromTemplate($html, 'mymodule', 'Test'), + array( + 'Test.SINGLEQUOTE' => array('Single Quote',null,null), + 'i18nTestModule.NEWMETHODSIG' => array("New _t method signature test",null,null), + 'i18nTestModule.INJECTIONS_0' => array("Hello {name} {greeting}. But it is late, {goodbye}", null, null), + 'i18nTestModule.INJECTIONS_1' => array("Hello {name} {greeting}. But it is late, {goodbye}", null, null), + 'i18nTestModule.INJECTIONS_2' => array("Hello {name} {greeting}. But it is late, {goodbye}", null, "New context (this should be ignored)"), + 'i18nTestModule.INJECTIONS_3' => array(null, null, null), + 'i18nTestModule.INJECTIONS_4' => array(null, null, null), + ) + ); + } + function testCollectFromTemplateSimple() { $c = new i18nTextCollector(); @@ -282,6 +311,35 @@ PHP; ); } + /** + * Test extracting entities from the new _t method signature + */ + function testCollectFromCodeNewSignature() { + $c = new i18nTextCollector(); + + $php = <<"Mark", "greeting"=>"welcome", "goodbye"=>"bye")); +_t('i18nTestModule.INJECTIONS2', "Hello {name} {greeting}. But it is late, {goodbye}", array("name"=>"Paul", "greeting"=>"good you are here", "goodbye"=>"see you")); +_t("i18nTestModule.INJECTIONS3", "Hello {name} {greeting}. But it is late, {goodbye}", "New context (this should be ignored)", array("name"=>"Steffen", "greeting"=>"willkommen", "goodbye"=>"wiedersehen")); +_t('i18nTestModule.INJECTIONS4', array("name"=>"Cat", "greeting"=>"meow", "goodbye"=>"meow")); +PHP; + + $collectedTranslatables = $c->collectFromCode($php, 'mymodule'); + + $expectedArray = (array( + 'i18nTestModule.NEWMETHODSIG' => array("New _t method signature test", null, null), + 'i18nTestModule.INJECTIONS1' => array("_DOES_NOT_EXIST", 40, "Hello {name} {greeting}. But it is late, {goodbye}"), + 'i18nTestModule.INJECTIONS2' => array("Hello {name} {greeting}. But it is late, {goodbye}", null, null), + 'i18nTestModule.INJECTIONS3' => array("Hello {name} {greeting}. But it is late, {goodbye}", 40, "New context (this should be ignored)"), + 'i18nTestModule.INJECTIONS4' => array(null, null, null), + )); + + ksort($expectedArray); + + $this->assertEquals($collectedTranslatables, $expectedArray); + } + /** * Input for langArrayCodeForEntitySpec() should be suitable for insertion * into single-quoted strings, so needs to be escaped already.