From ea19554b44d65538446ce7258db2fbd3c9f953c5 Mon Sep 17 00:00:00 2001
From: Torsten Ruger <torsten@villataika.fi>
Date: Wed, 4 Jun 2014 19:55:04 +0300
Subject: [PATCH] adds crystals parser code

---
 lib/parser/README.markdown        | 56 ++++++++++++++++++++
 lib/parser/basic_types.rb         | 48 +++++++++++++++++
 lib/parser/call_site.rb           | 17 ++++++
 lib/parser/compound_types.rb      | 19 +++++++
 lib/parser/control.rb             | 20 +++++++
 lib/parser/crystal.rb             | 37 +++++++++++++
 lib/parser/expression.rb          | 18 +++++++
 lib/parser/function_definition.rb | 17 ++++++
 lib/parser/keywords.rb            | 28 ++++++++++
 lib/parser/module_definition.rb   | 15 ++++++
 lib/parser/operators.rb           | 52 ++++++++++++++++++
 lib/parser/tokens.rb              | 22 ++++++++
 lib/parser/transform.rb           | 87 +++++++++++++++++++++++++++++++
 13 files changed, 436 insertions(+)
 create mode 100644 lib/parser/README.markdown
 create mode 100644 lib/parser/basic_types.rb
 create mode 100644 lib/parser/call_site.rb
 create mode 100644 lib/parser/compound_types.rb
 create mode 100644 lib/parser/control.rb
 create mode 100644 lib/parser/crystal.rb
 create mode 100644 lib/parser/expression.rb
 create mode 100644 lib/parser/function_definition.rb
 create mode 100644 lib/parser/keywords.rb
 create mode 100644 lib/parser/module_definition.rb
 create mode 100644 lib/parser/operators.rb
 create mode 100644 lib/parser/tokens.rb
 create mode 100644 lib/parser/transform.rb

diff --git a/lib/parser/README.markdown b/lib/parser/README.markdown
new file mode 100644
index 0000000..4c1c279
--- /dev/null
+++ b/lib/parser/README.markdown
@@ -0,0 +1,56 @@
+Parser
+================
+
+This includes the parser and generated ast.
+
+Parslet is really great in that it:
+- does not generate code but instean gives a clean dsl to define a grammar
+- uses ruby modules so one can split the grammars up
+- has support for binary operators with presedence and binding
+- has a seperate tranform stage to generate an ast layer
+
+Especially the last point is great. Since it is seperate it does not clutter up the actual grammar.
+And it can generate a layer that has no links to the actual parser anymore, thus saving/automating
+a complete tranformation process. 
+
+
+Operator list from http://stackoverflow.com/questions/21060234/ruby-operator-precedence-table
+
+N A M  Operator(s)            Description
+- - -  -----------            -----------
+1 R Y  ! ~ +                  boolean NOT, bitwise complement, unary plus
+                              (unary plus may be redefined from Ruby 1.9 with +@)
+
+2 R Y  **                     exponentiation
+1 R Y  -                      unary minus (redefine with -@)
+
+2 L Y  * / %                  multiplication, division, modulo (remainder)
+2 L Y  + -                    addition (or concatenation), subtraction
+
+2 L Y  << >>                  bitwise shift-left (or append), bitwise shift-right
+2 L Y  &                      bitwise AND
+
+2 L Y  | ^                    bitwise OR, bitwise XOR (exclusive OR)
+2 L Y  < <= >= >              ordering
+
+2 N Y  == === != =~ !~ <=>    equality, pattern matching, comparison
+                              (!= and !~ may not be redefined prior to Ruby 1.9)
+
+2 L N  &&                     boolean AND
+2 L N  ||                     boolean OR
+
+2 N N  .. ...                 range creation (inclusive and exclusive)
+                              and boolean flip-flops
+
+3 R N  ? :                    ternary if-then-else (conditional)
+2 L N  rescue                 exception-handling modifier
+
+2 R N  =                      assignment
+2 R N  **= *= /= %= += -=     assignment
+2 R N  <<= >>=                assignment
+2 R N  &&= &= ||= |= ^=       assignment
+
+1 N N  defined?               test variable definition and type
+1 R N  not                    boolean NOT (low precedence)
+2 L N  and or                 boolean AND, boolean OR (low precedence)
+2 N N  if unless while until  conditional and loop modifiers
\ No newline at end of file
diff --git a/lib/parser/basic_types.rb b/lib/parser/basic_types.rb
new file mode 100644
index 0000000..1b83dfb
--- /dev/null
+++ b/lib/parser/basic_types.rb
@@ -0,0 +1,48 @@
+module Parser
+  # Basic types are numbers and strings
+  module BasicTypes
+    include Parslet
+    # space really is just space. ruby is newline sensitive, so there is more whitespace footwork
+    # rule of thumb is that anything eats space behind it, but only space, no newlines
+    rule(:space)  { (str('\t') | str(' ')).repeat(1) }
+    rule(:space?) { space.maybe }
+    rule(:linebreak){ str("\n") >> space? >> linebreak.repeat }
+    
+    rule(:quote)      { str('"') }
+    rule(:nonquote)   { str('"').absent? >> any }
+
+    rule(:comment){ match('#') >> (linebreak.absent? >> any).repeat >> linebreak }
+    rule(:newline) { linebreak | comment }
+    rule(:eol) { newline  | any.absent? }
+    
+    rule(:double_quote){ str('"') }
+    rule(:minus) { str('-') }
+    rule(:plus) { str('+') }
+
+    rule(:sign) { plus | minus }
+    rule(:dot) {  str('.') }
+    rule(:digit) { match('[0-9]') }
+    rule(:exponent) { (str('e')| str('E')) }
+     
+    # identifier must start with lower case
+    # TODO rule forbit names like if_true, because it starts with a keyword. a little looser please!
+    rule(:name)   { keyword.absent? >> (match['a-z_'] >> match['a-zA-Z0-9_'].repeat).as(:name)  >> space? }
+    # instance variables must have the @
+    rule(:instance_variable) { (str('@') >> name).as(:instance_variable) }
+    # and class/module names must start with capital 
+    # (admittatly the rule matches constants too, but one step at a time)
+    rule(:module_name) { keyword.absent? >> (match['A-Z'] >> match['a-zA-Z0-9_'].repeat).as(:module_name)  >> space? }
+    
+    rule(:escape)     { str('\\') >> any.as(:esc) }
+    rule(:string)     { quote >> (
+        escape | 
+        nonquote.as(:char)
+      ).repeat(1).as(:string) >> quote }
+    
+    rule(:integer)    { sign.maybe >> digit.repeat(1).as(:integer) >> space? }
+    
+    rule(:float) { integer >>  dot >> integer >> 
+                            (exponent >> sign.maybe >> digit.repeat(1,3)).maybe >> space?}
+    rule(:basic_type){ integer | name | string | float | instance_variable | module_name }
+  end
+end
\ No newline at end of file
diff --git a/lib/parser/call_site.rb b/lib/parser/call_site.rb
new file mode 100644
index 0000000..5d0edac
--- /dev/null
+++ b/lib/parser/call_site.rb
@@ -0,0 +1,17 @@
+module Parser
+  module CallSite
+    include Parslet
+
+    rule(:argument_list) {
+      left_parenthesis >>
+      (  ((operator_expression|value_expression).as(:argument) >> space? >>
+          (comma >> space? >> (operator_expression|value_expression).as(:argument)).repeat(0)).repeat(0,1)).as(:argument_list) >>
+          space? >> right_parenthesis
+    }
+
+    rule(:call_site) { ((module_name|instance_variable|name).as(:receiver) >> str(".")).maybe >> #possibly qualified
+                          name.as(:call_site) >> argument_list >> comment.maybe}
+
+    
+  end
+end
diff --git a/lib/parser/compound_types.rb b/lib/parser/compound_types.rb
new file mode 100644
index 0000000..f5a6900
--- /dev/null
+++ b/lib/parser/compound_types.rb
@@ -0,0 +1,19 @@
+module Parser
+  # Compound types are Arrays and Hashes
+  module CompoundTypes
+    include Parslet
+
+    rule(:array_constant) do
+      left_bracket >>
+      (  ((operator_expression|value_expression).as(:array_element) >> space? >>
+          (comma >> space? >> (operator_expression|value_expression).as(:array_element)).repeat(0)).repeat(0,1)).as(:array_constant) >>
+          space? >> right_bracket
+      end
+
+    rule(:hash_pair)  { basic_type.as(:hash_key) >> association >> (operator_expression|value_expression).as(:hash_value) }
+    rule(:hash_constant)       { left_brace >> ((hash_pair.as(:hash_pair) >> 
+                         (comma >> space? >> hash_pair.as(:hash_pair)).repeat(0)).repeat(0,1)).as(:hash_constant)>> 
+                         space? >> right_brace }
+
+  end
+end
\ No newline at end of file
diff --git a/lib/parser/control.rb b/lib/parser/control.rb
new file mode 100644
index 0000000..2542438
--- /dev/null
+++ b/lib/parser/control.rb
@@ -0,0 +1,20 @@
+module Parser
+  module Control
+    include Parslet
+    rule(:conditional) do
+      keyword_if >> 
+      (( (value_expression|operator_expression).as(:conditional) ) |
+        left_parenthesis >> (operator_expression|value_expression).as(:conditional) >>  right_parenthesis) >>
+      newline >> expressions_else.as(:if_true) >> newline >> expressions_end.as(:if_false)
+      end
+    
+    rule(:while_do) do
+      keyword_while  >> left_parenthesis >> (operator_expression|value_expression).as(:while_cond)  >>
+                                          right_parenthesis >> keyword_do >> newline >>
+                              expressions_end.as(:body)
+    end
+    rule(:simple_return) do
+      keyword_return >> (operator_expression|value_expression).as(:return_expression)
+    end
+  end
+end
diff --git a/lib/parser/crystal.rb b/lib/parser/crystal.rb
new file mode 100644
index 0000000..bb52818
--- /dev/null
+++ b/lib/parser/crystal.rb
@@ -0,0 +1,37 @@
+require_relative "basic_types"
+require_relative "compound_types"
+require_relative "tokens"
+require_relative "keywords"
+require_relative "control"
+require_relative "expression"
+require_relative "call_site"
+require_relative "function_definition"
+require_relative "module_definition"
+require_relative "operators"
+
+module Parser
+  
+  # obviously a work in progress !!
+  # We "compose" the parser from bits, divide and hopefully conquer
+   
+  # a note about .maybe : .maybe is almost every respect the same as .repeat(0,1)
+  # so either 0, or 1, in other words maybe. Nice feature, but there are strings attached:
+  # a maybe removes the 0  a sequence (array) to a single (hash). Thus 2 transformations are needed
+  # More work than the prettiness is worth, so only use .maybe on something that does not need capturing
+
+  class Crystal < Parslet::Parser
+    include BasicTypes
+    include CompoundTypes
+    include Tokens
+    include Keywords
+    include Control
+    include Expression
+    include CallSite
+    include FunctionDefinition
+    include Operators
+    include ModuleDef
+
+    rule(:root_body)    {(module_definition | class_definition | function_definition | expression  | call_site )}
+    rule(:root)         { root_body.repeat() }
+  end
+end
diff --git a/lib/parser/expression.rb b/lib/parser/expression.rb
new file mode 100644
index 0000000..77e315f
--- /dev/null
+++ b/lib/parser/expression.rb
@@ -0,0 +1,18 @@
+module Parser
+  module Expression
+    include Parslet
+    
+    rule(:value_expression) { call_site | basic_type }
+
+    rule(:expression) { (simple_return | while_do | conditional | operator_expression | call_site ) >> newline }
+
+    def delimited_expressions( delimit )
+      ( (delimit.absent? >> expression).repeat(1)).as(:expressions) >> delimit
+    end
+
+    rule(:expressions_do)     { delimited_expressions(keyword_do) }
+    rule(:expressions_else)   { delimited_expressions(keyword_else) }
+    rule(:expressions_end)    { delimited_expressions(keyword_end) }
+
+  end
+end
diff --git a/lib/parser/function_definition.rb b/lib/parser/function_definition.rb
new file mode 100644
index 0000000..1aa1815
--- /dev/null
+++ b/lib/parser/function_definition.rb
@@ -0,0 +1,17 @@
+module Parser
+  module FunctionDefinition
+    include Parslet
+    
+    rule(:function_definition) {
+      keyword_def >> ((module_name|instance_variable|name).as(:receiver) >> str(".")).maybe >> #possibly qualified
+                  name.as(:function_name) >> parmeter_list.maybe >> newline >> expressions_end >> newline
+    }
+
+    rule(:parmeter_list) {
+      left_parenthesis >>
+        ((name.as(:parmeter) >> (comma >> name.as(:parmeter)).repeat(0)).repeat(0,1)).as(:parmeter_list) >>
+      right_parenthesis
+    }
+
+  end
+end
diff --git a/lib/parser/keywords.rb b/lib/parser/keywords.rb
new file mode 100644
index 0000000..478e3f2
--- /dev/null
+++ b/lib/parser/keywords.rb
@@ -0,0 +1,28 @@
+module Parser
+  module Keywords
+    include Parslet
+    
+    rule(:keyword_begin)  {  str('begin').as(:begin) >> space?}
+    rule(:keyword_class)  {  str('class') >> space? }
+    rule(:keyword_def)    {  str('def') >> space? }
+    rule(:keyword_do)     {  str('do').as(:do) >> space?}
+    rule(:keyword_else)   {  str('else').as(:else) >> space? }
+    rule(:keyword_end)    {  str('end').as(:end) >> space? }  
+    rule(:keyword_false)  {  str('false').as(:false) >> space?}
+    rule(:keyword_if)     {  str('if').as(:if)   >> space? }
+    rule(:keyword_rescue) {  str('rescue').as(:rescue) >> space?}
+    rule(:keyword_return) {  str('return').as(:return) >> space?}
+    rule(:keyword_true)   {  str('true').as(:true) >> space?}
+    rule(:keyword_module) {  str('module') >> space? }
+    rule(:keyword_nil)    {  str('nil').as(:nil) >> space?}
+    rule(:keyword_unless) {  str('unless').as(:unless) >> space?}
+    rule(:keyword_until)  {  str('until').as(:until) >> space?}
+    rule(:keyword_while)  {  str('while').as(:while) >> space?}
+    
+    # this rule is just to make sure identifiers can't be keywords. Kind of duplication here, but we need the 
+    # space in above rules, so just make sure to add any here too.
+    rule(:keyword){ str('begin') | str('def') | str('do') | str('else') | str('end') | 
+                    str('false')| str('if')| str('rescue')| str('true')| str('nil') |
+                    str('unless')| str('until')| str('while')}
+  end
+end
\ No newline at end of file
diff --git a/lib/parser/module_definition.rb b/lib/parser/module_definition.rb
new file mode 100644
index 0000000..023685f
--- /dev/null
+++ b/lib/parser/module_definition.rb
@@ -0,0 +1,15 @@
+module Parser
+  module ModuleDef
+    include Parslet
+    rule(:module_definition) do
+      keyword_module >> module_name >> eol >>
+      ( (keyword_end.absent? >> root_body).repeat()).as(:module_expressions) >> keyword_end >> newline
+    end
+
+    rule(:class_definition) do
+      keyword_class >> module_name >> eol >>
+      ( (keyword_end.absent? >> root_body).repeat()).as(:class_expressions) >> keyword_end >> newline
+    end
+
+  end
+end
diff --git a/lib/parser/operators.rb b/lib/parser/operators.rb
new file mode 100644
index 0000000..dac03e6
--- /dev/null
+++ b/lib/parser/operators.rb
@@ -0,0 +1,52 @@
+module Parser
+  module Operators
+    include Parslet
+    rule(:exponent) { str('**') >> space?}
+    rule(:multiply) { match['*/%']  >> space? }
+    rule(:plus) { match['+-']  >> space? }
+    rule(:shift) { str(">>") | str("<<") >> space?}
+    rule(:bit_and) { str('&') >> space?}
+    rule(:bit_or) { str('|') >> space?}
+    rule(:greater_equal) { str('>=') >> space?}
+    rule(:less_or_equal) { str('<=') >> space?}
+    rule(:larger) { str('>') >> space?}
+    rule(:smaller) { str('<') >> space?}
+    rule(:identity) { str('===') >> space?}
+    rule(:equal) { str('==') >> space?}
+    rule(:not_equal) { str('!=') >> space?}
+    rule(:boolean_and) { str('&&') | str("and") >> space?}
+    rule(:boolean_or) { str('||') | str("or") >> space?}
+    rule(:assign) { str('=') >> space?}
+    rule(:op_assign) { str('+=')|str('-=')|str('*=')|str('/=')|str('%=') >> space?}
+    rule(:eclipse) { str('..') |str("...") >> space?}
+    rule(:assign) { str('=') >> space?}
+  
+    #infix doing the heavy lifting here, 
+    # is defined as an expressions and array of [atoms,priority,binding] triples
+    rule(:operator_expression) do infix_expression(value_expression,
+                                     [exponent, 120, :left] ,
+                                     [multiply, 120, :left] ,
+                                     [plus, 110, :left],
+                                     [shift, 100, :left],
+                                     [bit_and, 90, :left],
+                                     [bit_or, 90, :right],
+                                     [greater_equal, 80, :left],
+                                     [less_or_equal, 80, :left],
+                                     [larger, 80, :left],
+                                     [smaller, 80, :left],
+                                     [identity, 70, :right],
+                                     [equal, 70, :right],
+                                     [not_equal, 70, :right],
+                                     [boolean_and, 60, :left],
+                                     [boolean_or, 50, :right],
+                                     [eclipse, 40, :right],
+                                     [keyword_rescue, 30, :right], 
+                                     [assign, 20, :right],
+                                     [op_assign, 20, :right],
+                                     [keyword_until, 10, :right], 
+                                     [keyword_while, 10, :right], 
+                                     [keyword_unless, 10, :right], 
+                                     [keyword_if, 10, :right]) 
+                                   end
+  end
+end
diff --git a/lib/parser/tokens.rb b/lib/parser/tokens.rb
new file mode 100644
index 0000000..da9b5e0
--- /dev/null
+++ b/lib/parser/tokens.rb
@@ -0,0 +1,22 @@
+module Parser
+  # Tokens are single or double character combinations with "meaning"
+  # braces, comman, point, questionmark , quotes, that kind of thing
+  # operator symbols are seperate in Opreators
+  module Tokens
+    include Parslet
+    rule(:left_parenthesis) { str('(') >> space? }
+    rule(:right_parenthesis) { str(')') >> space? }
+    rule(:left_brace)  { str('{')    >> space? }
+    rule(:right_brace)  { str('}')    >> space? }
+    rule(:left_bracket)  { str('[')    >> space? }
+    rule(:right_bracket)  { str(']')    >> space? }
+
+    rule(:association)  { str("=>") >> space? }
+    rule(:comma)  { str(',') >> space? }
+    rule(:colon)  { str(':') >> space? }
+    rule(:semicolon)  { str(';') >> space? }
+    rule(:question_mark)  { str('?') >> space? }
+    rule(:excamation_mark)  { str('!') >> space? }  
+
+  end
+end
\ No newline at end of file
diff --git a/lib/parser/transform.rb b/lib/parser/transform.rb
new file mode 100644
index 0000000..cddbcef
--- /dev/null
+++ b/lib/parser/transform.rb
@@ -0,0 +1,87 @@
+require 'parslet'
+require 'ast/expression'
+
+module Parser
+  class Transform < Parslet::Transform
+    rule(:string => sequence(:chars)) { Ast::StringExpression.new chars.join }
+    rule(:esc => simple(:esc)) { '\\' +  esc }
+    rule(char: simple(:char)) { char }
+    
+    rule(:integer => simple(:value)) { Ast::IntegerExpression.new(value.to_i) }
+    rule(:name   => simple(:name))  { Ast::NameExpression.new(name.to_s) }
+    rule(:instance_variable   => simple(:instance_variable))  { Ast::VariableExpression.new(instance_variable.name) }
+    rule(:module_name   => simple(:module_name))  { Ast::ModuleName.new(module_name.to_s) }
+
+    rule(:array_constant => sequence(:array_constant) ) { Ast::ArrayExpression.new(array_constant) } 
+    rule(:array_element  => simple(:array_element))    { array_element  }
+    rule(:hash_constant => sequence(:hash_constant) ) { Ast::HashExpression.new(hash_constant) } 
+    rule(:hash_key => simple(:hash_key) , :hash_value => simple(:hash_value)) {  Ast::AssociationExpression.new(hash_key,hash_value) }
+    rule(:hash_pair => simple(:hash_pair) ) {  hash_pair }
+
+    rule(:argument  => simple(:argument))    { argument  }
+    rule(:argument_list => sequence(:argument_list)) { argument_list }
+
+    #Two rules for calls, simple and qualified. Keeps the rules simpler
+    rule( :call_site => simple(:call_site), 
+          :argument_list    => sequence(:argument_list)) do
+           Ast::CallSiteExpression.new(call_site.name, argument_list )
+    end
+    rule( :receiver => simple(:receiver) , :call_site => simple(:call_site), 
+          :argument_list    => sequence(:argument_list)) do
+           Ast::CallSiteExpression.new(call_site.name, argument_list , receiver) 
+    end
+
+    rule(:if => simple(:if), :conditional     => simple(:conditional),
+         :if_true  => {:expressions => sequence(:if_true) , :else => simple(:else) },
+         :if_false => {:expressions => sequence(:if_false) , :end => simple(:e) }) do
+           Ast::IfExpression.new(conditional, if_true, if_false) 
+         end
+
+    rule(:while     => simple(:while),
+         :while_cond => simple(:while_cond) , :do => simple(:do) , 
+         :body => {:expressions => sequence(:body) , :end => simple(:e) }) do
+           Ast::WhileExpression.new(while_cond, body) 
+         end
+
+    rule(:return => simple(:return) , :return_expression => simple(:return_expression))do
+       Ast::ReturnExpression.new(return_expression) 
+     end
+
+    rule(:parmeter  => simple(:parmeter))    { parmeter  }
+    rule(:parmeter_list => sequence(:parmeter_list)) { parmeter_list }
+
+    # Also two rules for function definitions, unqualified and qualified
+    rule(:function_name   => simple(:function_name),
+         :parmeter_list => sequence(:parmeter_list),
+         :expressions   => sequence(:expressions) , :end => simple(:e)) do
+            Ast::FunctionExpression.new(function_name.name, parmeter_list, expressions)
+          end
+
+    rule(:receiver=> simple(:receiver),
+         :function_name   => simple(:function_name),
+         :parmeter_list => sequence(:parmeter_list),
+         :expressions   => sequence(:expressions) , :end => simple(:e)) do
+            Ast::FunctionExpression.new(function_name.name, parmeter_list, expressions , receiver)
+          end
+
+    rule(l: simple(:l), o: simple(:o) , r: simple(:r)) do 
+      Ast::OperatorExpression.new( o.to_s.strip , l ,r)
+    end
+    
+    #modules and classes are understandibly quite similar   Class < Module
+    rule( :module_name => simple(:module_name) , :module_expressions => sequence(:module_expressions) , :end=>"end") do
+      Ast::ModuleExpression.new(module_name , module_expressions)
+    end
+    rule( :module_name => simple(:module_name) , :class_expressions => sequence(:class_expressions) , :end=>"end") do
+      Ast::ClassExpression.new(module_name , class_expressions)
+    end
+    
+    #shortcut to get the ast tree for a given string
+    # optional second arguement specifies a rule that will be parsed (mainly for testing)     
+    def self.ast string , rule = :root
+      syntax    = Parser.new.send(rule).parse(string)
+      tree      = Transform.new.apply(syntax)
+      tree
+    end
+  end
+end