soml-parser/lib/parser/basic.citrus


grammar BasicTypes

  # unicode generalized categories , according to regex ruby page
  rule lower  /[[:lower:]]/ end    # Lowercase alphabetical character
  rule upper  /[[:upper:]]/ end    # Uppercase alphabetical
  rule alnum  /[[:alnum:]]/ end    # Alphabetic and numeric character
  rule alpha  /[[:alpha:]]/ end    # Alphabetic character
  rule blank  /[[:blank:]]/ end    # Space or tab
  rule space  /[[:space:]]/ end    # Whitespace character ([:blank:], newline, carriage return, etc.)
  rule digit  /[[:digit:]]/ end    # Digit
  rule graph  /[[:graph:]]/ end    # Non-blank character (excludes spaces, control characters, and similar)
  rule print  /[[:print:]]/ end    # Like [:graph:], but includes the space character
  rule xdigit /[[:xdigit:]]/ end   # Digit allowed in a hexadecimal number (i.e., 0-9a-fA-F)

  rule linebreak (!blank space) end  #define in regex terms for utf

  rule comment  #don't include the newline (which ends the comment)
    "#" /.*/
  end

  rule name_expression
     (name:([a-z_] [a-zA-Z0-9_]*) space*) { Ast::NameExpression.new(capture(:name).to_str)}
  end

  rule module_name_expression
     (name:([A-Z] [a-zA-Z0-9_]*) space*) { Ast::ModuleName.new(capture(:name).to_str)}
  end

  rule digits
    [0-9] [0-9]*
  end

  rule integer_expression
    (digits space*) { Ast::IntegerExpression.new(to_str.to_i) }
  end

  rule string_expression
    #"'" (/.*/ !"'") "'"
     ('"' str:(!'"' .)* '"') {Ast::StringExpression.new(capture(:str).to_str) }
  end

  rule basic_expression
    name_expression | integer_expression  |
    module_name_expression | string_expression
  end

  rule keyword_begin    'begin'  space* end
  rule keyword_class    'class'  space*  end
  rule keyword_def      'def'    space*  end
  rule keyword_do       'do'     space* end
  rule keyword_else     'else'   space*  end
  rule keyword_end      'end'    space*  end
  rule keyword_if       'if'       end
  rule keyword_rescue   'rescue' space* end
  rule keyword_return   'return' space* end
  rule keyword_module   'module' space*  end
  rule keyword_unless   'unless' space* end
  rule keyword_until    'until'  space* end
  rule keyword_while    'while'  space* end

  rule keyword_nil
    ('nil'    space* ){ Ast::NilExpression.new }
  end

  rule keyword_false
    ('false'  space*) { Ast::FalseExpression.new }
  end

  rule keyword_true
    ('true'   space*) { Ast::TrueExpression.new }
  end

  # this rule is just to make sure identifiers can't be keywords. Kind of duplication here, but we need the
  # space in above rules, so just make sure to add any here too.
  rule keyword   ('begin'  |  'def'  |  'do'  |  'else'  |  'end'  |
                   'false' |  'if' |  'rescue' |  'true' |  'nil'  |
                   'unless' |  'until' |  'while') space*
  end

  rule keyword_expression
    keyword_true | keyword_false | keyword_nil | basic_expression
  end


  # Tokens are single or double character combinations with "meaning"
  # braces, comman, point, questionmark , quotes, that kind of thing
  # operator symbols are separate in Opreators

  rule left_parenthesis
     '('  space*
  end

  rule right_parenthesis
    ')' space*
  end

  rule left_brace
    '{'  space*
  end

  rule right_brace
    '}'  space*
  end

  rule left_bracket
    '['  space*
  end

  rule right_bracket
    ']'  space*
  end

  rule association
    "=>"  space*
  end

  rule comma
    ','  space*
  end

  rule colon
    ':'  space*
  end

  rule semicolon
    ';'  space*
  end

  rule question_mark
    '?' space*
  end

  rule excamation_mark
    '!' space*
  end

end