grammar BasicTypes # unicode generalized categories , according to regex ruby page rule lower /[[:lower:]]/ end # Lowercase alphabetical character rule upper /[[:upper:]]/ end # Uppercase alphabetical rule alnum /[[:alnum:]]/ end # Alphabetic and numeric character rule alpha /[[:alpha:]]/ end # Alphabetic character rule blank /[[:blank:]]/ end # Space or tab rule space /[[:space:]]/ end # Whitespace character ([:blank:], newline, carriage return, etc.) rule digit /[[:digit:]]/ end # Digit rule graph /[[:graph:]]/ end # Non-blank character (excludes spaces, control characters, and similar) rule print /[[:print:]]/ end # Like [:graph:], but includes the space character rule xdigit /[[:xdigit:]]/ end # Digit allowed in a hexadecimal number (i.e., 0-9a-fA-F) rule linebreak (!blank space) end #define in regex terms for utf rule comment #don't include the newline (which ends the comment) "#" /.*/ end rule name_expression (name:([a-z_] [a-zA-Z0-9_]*) space*) { Ast::NameExpression.new(capture(:name).to_str)} end rule module_name_expression (name:([A-Z] [a-zA-Z0-9_]*) space*) { Ast::ModuleName.new(capture(:name).to_str)} end rule digits [0-9] [0-9]* end rule integer_expression (digits space*) { Ast::IntegerExpression.new(to_str.to_i) } end rule string_expression #"'" (/.*/ !"'") "'" ('"' str:(!'"' .)* '"') {Ast::StringExpression.new(capture(:str).to_str) } end rule basic_expression name_expression | integer_expression | module_name_expression | string_expression end rule keyword_begin 'begin' space* end rule keyword_class 'class' space* end rule keyword_def 'def' space* end rule keyword_do 'do' space* end rule keyword_else 'else' space* end rule keyword_end 'end' space* end rule keyword_if 'if' end rule keyword_rescue 'rescue' space* end rule keyword_return 'return' space* end rule keyword_module 'module' space* end rule keyword_unless 'unless' space* end rule keyword_until 'until' space* end rule keyword_while 'while' space* end rule keyword_nil ('nil' space* ){ Ast::NilExpression.new } end rule keyword_false ('false' space*) { Ast::FalseExpression.new } end rule keyword_true ('true' space*) { Ast::TrueExpression.new } end # this rule is just to make sure identifiers can't be keywords. Kind of duplication here, but we need the # space in above rules, so just make sure to add any here too. rule keyword ('begin' | 'def' | 'do' | 'else' | 'end' | 'false' | 'if' | 'rescue' | 'true' | 'nil' | 'unless' | 'until' | 'while') space* end rule keyword_expression keyword_true | keyword_false | keyword_nil | basic_expression end # Tokens are single or double character combinations with "meaning" # braces, comman, point, questionmark , quotes, that kind of thing # operator symbols are separate in Opreators rule left_parenthesis '(' space* end rule right_parenthesis ')' space* end rule left_brace '{' space* end rule right_brace '}' space* end rule left_bracket '[' space* end rule right_bracket ']' space* end rule association "=>" space* end rule comma ',' space* end rule colon ':' space* end rule semicolon ';' space* end rule question_mark '?' space* end rule excamation_mark '!' space* end end