adds crystals parser code

This commit is contained in:
Torsten Ruger 2014-06-04 19:55:04 +03:00
parent e36fa0eb5d
commit ea19554b44
13 changed files with 436 additions and 0 deletions

View File

@ -0,0 +1,56 @@
Parser
================
This includes the parser and generated ast.
Parslet is really great in that it:
- does not generate code but instean gives a clean dsl to define a grammar
- uses ruby modules so one can split the grammars up
- has support for binary operators with presedence and binding
- has a seperate tranform stage to generate an ast layer
Especially the last point is great. Since it is seperate it does not clutter up the actual grammar.
And it can generate a layer that has no links to the actual parser anymore, thus saving/automating
a complete tranformation process.
Operator list from http://stackoverflow.com/questions/21060234/ruby-operator-precedence-table
N A M Operator(s) Description
- - - ----------- -----------
1 R Y ! ~ + boolean NOT, bitwise complement, unary plus
(unary plus may be redefined from Ruby 1.9 with +@)
2 R Y ** exponentiation
1 R Y - unary minus (redefine with -@)
2 L Y * / % multiplication, division, modulo (remainder)
2 L Y + - addition (or concatenation), subtraction
2 L Y << >> bitwise shift-left (or append), bitwise shift-right
2 L Y & bitwise AND
2 L Y | ^ bitwise OR, bitwise XOR (exclusive OR)
2 L Y < <= >= > ordering
2 N Y == === != =~ !~ <=> equality, pattern matching, comparison
(!= and !~ may not be redefined prior to Ruby 1.9)
2 L N && boolean AND
2 L N || boolean OR
2 N N .. ... range creation (inclusive and exclusive)
and boolean flip-flops
3 R N ? : ternary if-then-else (conditional)
2 L N rescue exception-handling modifier
2 R N = assignment
2 R N **= *= /= %= += -= assignment
2 R N <<= >>= assignment
2 R N &&= &= ||= |= ^= assignment
1 N N defined? test variable definition and type
1 R N not boolean NOT (low precedence)
2 L N and or boolean AND, boolean OR (low precedence)
2 N N if unless while until conditional and loop modifiers

48
lib/parser/basic_types.rb Normal file
View File

@ -0,0 +1,48 @@
module Parser
# Basic types are numbers and strings
module BasicTypes
include Parslet
# space really is just space. ruby is newline sensitive, so there is more whitespace footwork
# rule of thumb is that anything eats space behind it, but only space, no newlines
rule(:space) { (str('\t') | str(' ')).repeat(1) }
rule(:space?) { space.maybe }
rule(:linebreak){ str("\n") >> space? >> linebreak.repeat }
rule(:quote) { str('"') }
rule(:nonquote) { str('"').absent? >> any }
rule(:comment){ match('#') >> (linebreak.absent? >> any).repeat >> linebreak }
rule(:newline) { linebreak | comment }
rule(:eol) { newline | any.absent? }
rule(:double_quote){ str('"') }
rule(:minus) { str('-') }
rule(:plus) { str('+') }
rule(:sign) { plus | minus }
rule(:dot) { str('.') }
rule(:digit) { match('[0-9]') }
rule(:exponent) { (str('e')| str('E')) }
# identifier must start with lower case
# TODO rule forbit names like if_true, because it starts with a keyword. a little looser please!
rule(:name) { keyword.absent? >> (match['a-z_'] >> match['a-zA-Z0-9_'].repeat).as(:name) >> space? }
# instance variables must have the @
rule(:instance_variable) { (str('@') >> name).as(:instance_variable) }
# and class/module names must start with capital
# (admittatly the rule matches constants too, but one step at a time)
rule(:module_name) { keyword.absent? >> (match['A-Z'] >> match['a-zA-Z0-9_'].repeat).as(:module_name) >> space? }
rule(:escape) { str('\\') >> any.as(:esc) }
rule(:string) { quote >> (
escape |
nonquote.as(:char)
).repeat(1).as(:string) >> quote }
rule(:integer) { sign.maybe >> digit.repeat(1).as(:integer) >> space? }
rule(:float) { integer >> dot >> integer >>
(exponent >> sign.maybe >> digit.repeat(1,3)).maybe >> space?}
rule(:basic_type){ integer | name | string | float | instance_variable | module_name }
end
end

17
lib/parser/call_site.rb Normal file
View File

@ -0,0 +1,17 @@
module Parser
module CallSite
include Parslet
rule(:argument_list) {
left_parenthesis >>
( ((operator_expression|value_expression).as(:argument) >> space? >>
(comma >> space? >> (operator_expression|value_expression).as(:argument)).repeat(0)).repeat(0,1)).as(:argument_list) >>
space? >> right_parenthesis
}
rule(:call_site) { ((module_name|instance_variable|name).as(:receiver) >> str(".")).maybe >> #possibly qualified
name.as(:call_site) >> argument_list >> comment.maybe}
end
end

View File

@ -0,0 +1,19 @@
module Parser
# Compound types are Arrays and Hashes
module CompoundTypes
include Parslet
rule(:array_constant) do
left_bracket >>
( ((operator_expression|value_expression).as(:array_element) >> space? >>
(comma >> space? >> (operator_expression|value_expression).as(:array_element)).repeat(0)).repeat(0,1)).as(:array_constant) >>
space? >> right_bracket
end
rule(:hash_pair) { basic_type.as(:hash_key) >> association >> (operator_expression|value_expression).as(:hash_value) }
rule(:hash_constant) { left_brace >> ((hash_pair.as(:hash_pair) >>
(comma >> space? >> hash_pair.as(:hash_pair)).repeat(0)).repeat(0,1)).as(:hash_constant)>>
space? >> right_brace }
end
end

20
lib/parser/control.rb Normal file
View File

@ -0,0 +1,20 @@
module Parser
module Control
include Parslet
rule(:conditional) do
keyword_if >>
(( (value_expression|operator_expression).as(:conditional) ) |
left_parenthesis >> (operator_expression|value_expression).as(:conditional) >> right_parenthesis) >>
newline >> expressions_else.as(:if_true) >> newline >> expressions_end.as(:if_false)
end
rule(:while_do) do
keyword_while >> left_parenthesis >> (operator_expression|value_expression).as(:while_cond) >>
right_parenthesis >> keyword_do >> newline >>
expressions_end.as(:body)
end
rule(:simple_return) do
keyword_return >> (operator_expression|value_expression).as(:return_expression)
end
end
end

37
lib/parser/crystal.rb Normal file
View File

@ -0,0 +1,37 @@
require_relative "basic_types"
require_relative "compound_types"
require_relative "tokens"
require_relative "keywords"
require_relative "control"
require_relative "expression"
require_relative "call_site"
require_relative "function_definition"
require_relative "module_definition"
require_relative "operators"
module Parser
# obviously a work in progress !!
# We "compose" the parser from bits, divide and hopefully conquer
# a note about .maybe : .maybe is almost every respect the same as .repeat(0,1)
# so either 0, or 1, in other words maybe. Nice feature, but there are strings attached:
# a maybe removes the 0 a sequence (array) to a single (hash). Thus 2 transformations are needed
# More work than the prettiness is worth, so only use .maybe on something that does not need capturing
class Crystal < Parslet::Parser
include BasicTypes
include CompoundTypes
include Tokens
include Keywords
include Control
include Expression
include CallSite
include FunctionDefinition
include Operators
include ModuleDef
rule(:root_body) {(module_definition | class_definition | function_definition | expression | call_site )}
rule(:root) { root_body.repeat() }
end
end

18
lib/parser/expression.rb Normal file
View File

@ -0,0 +1,18 @@
module Parser
module Expression
include Parslet
rule(:value_expression) { call_site | basic_type }
rule(:expression) { (simple_return | while_do | conditional | operator_expression | call_site ) >> newline }
def delimited_expressions( delimit )
( (delimit.absent? >> expression).repeat(1)).as(:expressions) >> delimit
end
rule(:expressions_do) { delimited_expressions(keyword_do) }
rule(:expressions_else) { delimited_expressions(keyword_else) }
rule(:expressions_end) { delimited_expressions(keyword_end) }
end
end

View File

@ -0,0 +1,17 @@
module Parser
module FunctionDefinition
include Parslet
rule(:function_definition) {
keyword_def >> ((module_name|instance_variable|name).as(:receiver) >> str(".")).maybe >> #possibly qualified
name.as(:function_name) >> parmeter_list.maybe >> newline >> expressions_end >> newline
}
rule(:parmeter_list) {
left_parenthesis >>
((name.as(:parmeter) >> (comma >> name.as(:parmeter)).repeat(0)).repeat(0,1)).as(:parmeter_list) >>
right_parenthesis
}
end
end

28
lib/parser/keywords.rb Normal file
View File

@ -0,0 +1,28 @@
module Parser
module Keywords
include Parslet
rule(:keyword_begin) { str('begin').as(:begin) >> space?}
rule(:keyword_class) { str('class') >> space? }
rule(:keyword_def) { str('def') >> space? }
rule(:keyword_do) { str('do').as(:do) >> space?}
rule(:keyword_else) { str('else').as(:else) >> space? }
rule(:keyword_end) { str('end').as(:end) >> space? }
rule(:keyword_false) { str('false').as(:false) >> space?}
rule(:keyword_if) { str('if').as(:if) >> space? }
rule(:keyword_rescue) { str('rescue').as(:rescue) >> space?}
rule(:keyword_return) { str('return').as(:return) >> space?}
rule(:keyword_true) { str('true').as(:true) >> space?}
rule(:keyword_module) { str('module') >> space? }
rule(:keyword_nil) { str('nil').as(:nil) >> space?}
rule(:keyword_unless) { str('unless').as(:unless) >> space?}
rule(:keyword_until) { str('until').as(:until) >> space?}
rule(:keyword_while) { str('while').as(:while) >> space?}
# this rule is just to make sure identifiers can't be keywords. Kind of duplication here, but we need the
# space in above rules, so just make sure to add any here too.
rule(:keyword){ str('begin') | str('def') | str('do') | str('else') | str('end') |
str('false')| str('if')| str('rescue')| str('true')| str('nil') |
str('unless')| str('until')| str('while')}
end
end

View File

@ -0,0 +1,15 @@
module Parser
module ModuleDef
include Parslet
rule(:module_definition) do
keyword_module >> module_name >> eol >>
( (keyword_end.absent? >> root_body).repeat()).as(:module_expressions) >> keyword_end >> newline
end
rule(:class_definition) do
keyword_class >> module_name >> eol >>
( (keyword_end.absent? >> root_body).repeat()).as(:class_expressions) >> keyword_end >> newline
end
end
end

52
lib/parser/operators.rb Normal file
View File

@ -0,0 +1,52 @@
module Parser
module Operators
include Parslet
rule(:exponent) { str('**') >> space?}
rule(:multiply) { match['*/%'] >> space? }
rule(:plus) { match['+-'] >> space? }
rule(:shift) { str(">>") | str("<<") >> space?}
rule(:bit_and) { str('&') >> space?}
rule(:bit_or) { str('|') >> space?}
rule(:greater_equal) { str('>=') >> space?}
rule(:less_or_equal) { str('<=') >> space?}
rule(:larger) { str('>') >> space?}
rule(:smaller) { str('<') >> space?}
rule(:identity) { str('===') >> space?}
rule(:equal) { str('==') >> space?}
rule(:not_equal) { str('!=') >> space?}
rule(:boolean_and) { str('&&') | str("and") >> space?}
rule(:boolean_or) { str('||') | str("or") >> space?}
rule(:assign) { str('=') >> space?}
rule(:op_assign) { str('+=')|str('-=')|str('*=')|str('/=')|str('%=') >> space?}
rule(:eclipse) { str('..') |str("...") >> space?}
rule(:assign) { str('=') >> space?}
#infix doing the heavy lifting here,
# is defined as an expressions and array of [atoms,priority,binding] triples
rule(:operator_expression) do infix_expression(value_expression,
[exponent, 120, :left] ,
[multiply, 120, :left] ,
[plus, 110, :left],
[shift, 100, :left],
[bit_and, 90, :left],
[bit_or, 90, :right],
[greater_equal, 80, :left],
[less_or_equal, 80, :left],
[larger, 80, :left],
[smaller, 80, :left],
[identity, 70, :right],
[equal, 70, :right],
[not_equal, 70, :right],
[boolean_and, 60, :left],
[boolean_or, 50, :right],
[eclipse, 40, :right],
[keyword_rescue, 30, :right],
[assign, 20, :right],
[op_assign, 20, :right],
[keyword_until, 10, :right],
[keyword_while, 10, :right],
[keyword_unless, 10, :right],
[keyword_if, 10, :right])
end
end
end

22
lib/parser/tokens.rb Normal file
View File

@ -0,0 +1,22 @@
module Parser
# Tokens are single or double character combinations with "meaning"
# braces, comman, point, questionmark , quotes, that kind of thing
# operator symbols are seperate in Opreators
module Tokens
include Parslet
rule(:left_parenthesis) { str('(') >> space? }
rule(:right_parenthesis) { str(')') >> space? }
rule(:left_brace) { str('{') >> space? }
rule(:right_brace) { str('}') >> space? }
rule(:left_bracket) { str('[') >> space? }
rule(:right_bracket) { str(']') >> space? }
rule(:association) { str("=>") >> space? }
rule(:comma) { str(',') >> space? }
rule(:colon) { str(':') >> space? }
rule(:semicolon) { str(';') >> space? }
rule(:question_mark) { str('?') >> space? }
rule(:excamation_mark) { str('!') >> space? }
end
end

87
lib/parser/transform.rb Normal file
View File

@ -0,0 +1,87 @@
require 'parslet'
require 'ast/expression'
module Parser
class Transform < Parslet::Transform
rule(:string => sequence(:chars)) { Ast::StringExpression.new chars.join }
rule(:esc => simple(:esc)) { '\\' + esc }
rule(char: simple(:char)) { char }
rule(:integer => simple(:value)) { Ast::IntegerExpression.new(value.to_i) }
rule(:name => simple(:name)) { Ast::NameExpression.new(name.to_s) }
rule(:instance_variable => simple(:instance_variable)) { Ast::VariableExpression.new(instance_variable.name) }
rule(:module_name => simple(:module_name)) { Ast::ModuleName.new(module_name.to_s) }
rule(:array_constant => sequence(:array_constant) ) { Ast::ArrayExpression.new(array_constant) }
rule(:array_element => simple(:array_element)) { array_element }
rule(:hash_constant => sequence(:hash_constant) ) { Ast::HashExpression.new(hash_constant) }
rule(:hash_key => simple(:hash_key) , :hash_value => simple(:hash_value)) { Ast::AssociationExpression.new(hash_key,hash_value) }
rule(:hash_pair => simple(:hash_pair) ) { hash_pair }
rule(:argument => simple(:argument)) { argument }
rule(:argument_list => sequence(:argument_list)) { argument_list }
#Two rules for calls, simple and qualified. Keeps the rules simpler
rule( :call_site => simple(:call_site),
:argument_list => sequence(:argument_list)) do
Ast::CallSiteExpression.new(call_site.name, argument_list )
end
rule( :receiver => simple(:receiver) , :call_site => simple(:call_site),
:argument_list => sequence(:argument_list)) do
Ast::CallSiteExpression.new(call_site.name, argument_list , receiver)
end
rule(:if => simple(:if), :conditional => simple(:conditional),
:if_true => {:expressions => sequence(:if_true) , :else => simple(:else) },
:if_false => {:expressions => sequence(:if_false) , :end => simple(:e) }) do
Ast::IfExpression.new(conditional, if_true, if_false)
end
rule(:while => simple(:while),
:while_cond => simple(:while_cond) , :do => simple(:do) ,
:body => {:expressions => sequence(:body) , :end => simple(:e) }) do
Ast::WhileExpression.new(while_cond, body)
end
rule(:return => simple(:return) , :return_expression => simple(:return_expression))do
Ast::ReturnExpression.new(return_expression)
end
rule(:parmeter => simple(:parmeter)) { parmeter }
rule(:parmeter_list => sequence(:parmeter_list)) { parmeter_list }
# Also two rules for function definitions, unqualified and qualified
rule(:function_name => simple(:function_name),
:parmeter_list => sequence(:parmeter_list),
:expressions => sequence(:expressions) , :end => simple(:e)) do
Ast::FunctionExpression.new(function_name.name, parmeter_list, expressions)
end
rule(:receiver=> simple(:receiver),
:function_name => simple(:function_name),
:parmeter_list => sequence(:parmeter_list),
:expressions => sequence(:expressions) , :end => simple(:e)) do
Ast::FunctionExpression.new(function_name.name, parmeter_list, expressions , receiver)
end
rule(l: simple(:l), o: simple(:o) , r: simple(:r)) do
Ast::OperatorExpression.new( o.to_s.strip , l ,r)
end
#modules and classes are understandibly quite similar Class < Module
rule( :module_name => simple(:module_name) , :module_expressions => sequence(:module_expressions) , :end=>"end") do
Ast::ModuleExpression.new(module_name , module_expressions)
end
rule( :module_name => simple(:module_name) , :class_expressions => sequence(:class_expressions) , :end=>"end") do
Ast::ClassExpression.new(module_name , class_expressions)
end
#shortcut to get the ast tree for a given string
# optional second arguement specifies a rule that will be parsed (mainly for testing)
def self.ast string , rule = :root
syntax = Parser.new.send(rule).parse(string)
tree = Transform.new.apply(syntax)
tree
end
end
end