soml-parser/lib/parser/basic.citrus


grammar BasicTypes

  # unicode generalized categories , according to regex ruby page
  rule lower  /[[:lower:]]/ end    # Lowercase alphabetical character
  rule upper  /[[:upper:]]/ end    # Uppercase alphabetical
  rule alnum  /[[:alnum:]]/ end    # Alphabetic and numeric character
  rule alpha  /[[:alpha:]]/ end    # Alphabetic character
  rule blank  /[[:blank:]]/ end    # Space or tab
  rule space  /[[:space:]]/ end    # Whitespace character ([:blank:], newline, carriage return, etc.)
  rule digit  /[[:digit:]]/ end    # Digit
  rule graph  /[[:graph:]]/ end    # Non-blank character (excludes spaces, control characters, and similar)
  rule print  /[[:print:]]/ end    # Like [:graph:], but includes the space character
  rule xdigit /[[:xdigit:]]/ end   # Digit allowed in a hexadecimal number (i.e., 0-9a-fA-F)

  #  ruby is newline sensitive, so there is more whitespace footwork
  #  rule of thumb is that anything eats space behind it, but only blank, no newlines
  #  comments are also deliminatord, but also don't include the newline
  rule deliminator  blank*  comment? end

  rule linebreak (!blank space) end  #define in regex terms for utf

  rule comment  #don't include the newline (which ends the comment)
    "#" /.*/
  end

  rule name_expression
     (name:([a-z_] [a-zA-Z0-9_]*) deliminator) { Ast::NameExpression.new(capture(:name).to_str)}
  end

  rule module_name_expression
     (name:([A-Z] [a-zA-Z0-9_]*) deliminator) { Ast::ModuleName.new(capture(:name).to_str)}
  end

  rule digits
    [0-9] [0-9]*
  end

  rule integer_expression
    (digits deliminator) { Ast::IntegerExpression.new(to_str.to_i) }
  end

  rule string_expression
    #"'" (/.*/ !"'") "'"
     ('"' str:(!'"' .)* '"') {Ast::StringExpression.new(capture(:str).to_str) }
  end

  rule basic_expression
    name_expression | integer_expression  |
    module_name_expression | string_expression
  end


  # Tokens are single or double character combinations with "meaning"
  # braces, comman, point, questionmark , quotes, that kind of thing
  # operator symbols are separate in Opreators

  rule left_parenthesis
     '('  space?
  end

  rule right_parenthesis
    ')' space?
  end

  rule left_brace
    '{'  space?
  end

  rule right_brace
    '}'  space?
  end

  rule left_bracket
    '['  space?
  end

  rule right_bracket
    ']'  space?
  end

  rule association
    "=>"  space?
  end

  rule comma
    ','  space?
  end

  rule colon
    ':'  space?
  end

  rule semicolon
    ';'  space?
  end

  rule question_mark
    '?' space?
  end

  rule excamation_mark
    '!' space?
  end

end