vendored parslet, deemed stable enough and better without dependency

2014-04-27 15:34:35 +03:00
parent 6fafeda66d
commit b1203363d4
42 changed files with 3415 additions and 2 deletions
--- a/lib/crystal.rb
+++ b/lib/crystal.rb
@@ -1,5 +1,3 @@
 # parslet is assumed to be checked out at the same level as crystal for now
 $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', ".." , "parslet",'lib'))
 require 'parslet'
 require "asm/program"
--- a/lib/parslet.rb
+++ b/lib/parslet.rb
@@ -0,0 +1,302 @@
 # A simple parser generator library. Typical usage would look like this: 
 #
 #   require 'parslet'
 #        
 #   class MyParser < Parslet::Parser
 #     rule(:a) { str('a').repeat }
 #     root(:a)        
 #   end
 #        
 #   pp MyParser.new.parse('aaaa')   # => 'aaaa'@0
 #   pp MyParser.new.parse('bbbb')   # => Parslet::Atoms::ParseFailed: 
 #                                   #    Don't know what to do with bbbb at line 1 char 1.
 #
 # The simple DSL allows you to define grammars in PEG-style. This kind of
 # grammar construction does away with the ambiguities that usually comes with
 # parsers; instead, it allows you to construct grammars that are easier to
 # debug, since less magic is involved. 
 #
 # Parslet is typically used in stages: 
 #
 # 
 # * Parsing the input string; this yields an intermediary tree, see
 #   Parslet.any, Parslet.match, Parslet.str, Parslet::ClassMethods#rule and
 #   Parslet::ClassMethods#root.
 # * Transformation of the tree into something useful to you, see
 #   Parslet::Transform, Parslet.simple, Parslet.sequence and Parslet.subtree.
 #
 # The first stage is traditionally intermingled with the second stage; output
 # from the second stage is usually called the 'Abstract Syntax Tree' or AST. 
 #
 # The stages are completely decoupled; You can change your grammar around and
 # use the second stage to isolate the rest of your code from the changes
 # you've effected. 
 #
 # == Further reading
 # 
 # All parslet atoms are subclasses of {Parslet::Atoms::Base}. You might want to
 # look at all of those: {Parslet::Atoms::Re}, {Parslet::Atoms::Str},
 # {Parslet::Atoms::Repetition}, {Parslet::Atoms::Sequence},
 # {Parslet::Atoms::Alternative}.
 #
 # == When things go wrong
 #
 # A parse that fails will raise {Parslet::ParseFailed}. This exception contains
 # all the details of what went wrong, including a detailed error trace that 
 # can be printed out as an ascii tree. ({Parslet::Cause})
 #
 module Parslet
  # Extends classes that include Parslet with the module
  # {Parslet::ClassMethods}.
  #
  def self.included(base)
    base.extend(ClassMethods)
  end
  # Raised when the parse failed to match. It contains the message that should
  # be presented to the user. More details can be extracted from the
  # exceptions #cause member: It contains an instance of {Parslet::Cause} that
  # stores all the details of your failed parse in a tree structure. 
  #
  #   begin
  #     parslet.parse(str)
  #   rescue Parslet::ParseFailed => failure
  #     puts failure.cause.ascii_tree
  #   end
  #
  # Alternatively, you can just require 'parslet/convenience' and call the
  # method #parse_with_debug instead of #parse. This method will never raise
  # and print error trees to stdout.
  #
  #   require 'parslet/convenience'
  #   parslet.parse_with_debug(str)
  #
  class ParseFailed < StandardError
    def initialize(message, cause=nil)
      super(message)
      @cause = cause
    end
    # Why the parse failed. 
    #
    # @return [Parslet::Cause]
    attr_reader :cause 
  end
  module ClassMethods
    # Define an entity for the parser. This generates a method of the same
    # name that can be used as part of other patterns. Those methods can be
    # freely mixed in your parser class with real ruby methods.
    # 
    #   class MyParser
    #     include Parslet
    #
    #     rule(:bar) { str('bar') }
    #     rule(:twobar) do
    #       bar >> bar
    #     end
    #
    #     root :twobar
    #   end
    #
    def rule(name, &definition)
      define_method(name) do
        @rules ||= {}     # <name, rule> memoization
        return @rules[name] if @rules.has_key?(name)
        # Capture the self of the parser class along with the definition.
        definition_closure = proc {
          self.instance_eval(&definition)
        }
        @rules[name] = Atoms::Entity.new(name, &definition_closure)
      end
    end
  end
  # Allows for delayed construction of #match. See also Parslet.match.
  #
  # @api private
  class DelayedMatchConstructor
    def [](str)
      Atoms::Re.new("[" + str + "]")
    end
  end
  # Returns an atom matching a character class. All regular expressions can be
  # used, as long as they match only a single character at a time. 
  #
  #   match('[ab]')     # will match either 'a' or 'b'
  #   match('[\n\s]')   # will match newlines and spaces
  #
  # There is also another (convenience) form of this method: 
  #
  #   match['a-z']      # synonymous to match('[a-z]')
  #   match['\n']       # synonymous to match('[\n]')
  #
  # @overload match(str)
  #   @param str [String] character class to match (regexp syntax)
  #   @return [Parslet::Atoms::Re] a parslet atom
  #
  def match(str=nil)
    return DelayedMatchConstructor.new unless str
    return Atoms::Re.new(str)
  end
  module_function :match
  # Returns an atom matching the +str+ given:
  #
  #   str('class')      # will match 'class' 
  #
  # @param str [String] string to match verbatim
  # @return [Parslet::Atoms::Str] a parslet atom
  # 
  def str(str)
    Atoms::Str.new(str)
  end
  module_function :str
  # Returns an atom matching any character. It acts like the '.' (dot)
  # character in regular expressions.
  #
  #   any.parse('a')    # => 'a'
  #
  # @return [Parslet::Atoms::Re] a parslet atom
  #
  def any
    Atoms::Re.new('.')
  end
  module_function :any
  # Introduces a new capture scope. This means that all old captures stay
  # accessible, but new values stored will only be available during the block
  # given and the old values will be restored after the block. 
  #
  # Example: 
  #   # :a will be available until the end of the block. Afterwards, 
  #   # :a from the outer scope will be available again, if such a thing 
  #   # exists. 
  #   scope { str('a').capture(:a) }
  #
  def scope(&block)
    Parslet::Atoms::Scope.new(block)
  end
  module_function :scope
  # Designates a piece of the parser as being dynamic. Dynamic parsers can
  # either return a parser at runtime, which will be applied on the input, or
  # return a result from a parse. 
  # 
  # Dynamic parse pieces are never cached and can introduce performance
  # abnormalitites - use sparingly where other constructs fail. 
  # 
  # Example: 
  #   # Parses either 'a' or 'b', depending on the weather
  #   dynamic { rand() < 0.5 ? str('a') : str('b') }
  #   
  def dynamic(&block)
    Parslet::Atoms::Dynamic.new(block)
  end
  module_function :dynamic
  # Returns a parslet atom that parses infix expressions. Operations are 
  # specified as a list of <atom, precedence, associativity> tuples, where 
  # atom is simply the parslet atom that matches an operator, precedence is 
  # a number and associativity is either :left or :right. 
  # 
  # Higher precedence indicates that the operation should bind tighter than
  # other operations with lower precedence. In common algebra, '+' has 
  # lower precedence than '*'. So you would have a precedence of 1 for '+' and
  # a precedence of 2 for '*'. Only the order relation between these two 
  # counts, so any number would work. 
  #
  # Associativity is what decides what interpretation to take for strings that
  # are ambiguous like '1 + 2 + 3'. If '+' is specified as left associative, 
  # the expression would be interpreted as '(1 + 2) + 3'. If right 
  # associativity is chosen, it would be interpreted as '1 + (2 + 3)'. Note 
  # that the hash trees output reflect that choice as well. 
  #
  # Example:
  #   infix_expression(integer, [add_op, 1, :left])
  #   # would parse things like '1 + 2'
  #
  # @param element [Parslet::Atoms::Base] elements that take the NUMBER position
  #    in the expression
  # @param operations [Array<(Parslet::Atoms::Base, Integer, {:left, :right})>]
  #  
  # @see Parslet::Atoms::Infix
  #
  def infix_expression(element, *operations)
    Parslet::Atoms::Infix.new(element, operations)
  end
  module_function :infix_expression
  # A special kind of atom that allows embedding whole treetop expressions
  # into parslet construction. 
  #
  #   # the same as str('a') >> str('b').maybe
  #   exp(%Q("a" "b"?))     
  #
  # @param str [String] a treetop expression
  # @return [Parslet::Atoms::Base] the corresponding parslet parser
  #
  def exp(str)
    Parslet::Expression.new(str).to_parslet
  end
  module_function :exp
  # Returns a placeholder for a tree transformation that will only match a
  # sequence of elements. The +symbol+ you specify will be the key for the
  # matched sequence in the returned dictionary.
  #
  #   # This would match a body element that contains several declarations.
  #   { :body => sequence(:declarations) }
  #
  # The above example would match <code>:body => ['a', 'b']</code>, but not
  # <code>:body => 'a'</code>. 
  #
  # see {Parslet::Transform}
  #
  def sequence(symbol)
    Pattern::SequenceBind.new(symbol)
  end
  module_function :sequence
  # Returns a placeholder for a tree transformation that will only match
  # simple elements. This matches everything that <code>#sequence</code>
  # doesn't match.
  #
  #   # Matches a single header. 
  #   { :header => simple(:header) }
  #
  # see {Parslet::Transform}
  #
  def simple(symbol)
    Pattern::SimpleBind.new(symbol)
  end
  module_function :simple
  # Returns a placeholder for tree transformation patterns that will match 
  # any kind of subtree. 
  #
  #   { :expression => subtree(:exp) }
  #
  def subtree(symbol)
    Pattern::SubtreeBind.new(symbol)
  end
  module_function :subtree
  autoload :Expression, 'parslet/expression'
 end
 require 'parslet/slice'
 require 'parslet/cause'
 require 'parslet/source'
 require 'parslet/atoms'
 require 'parslet/pattern'
 require 'parslet/pattern/binding'
 require 'parslet/transform'
 require 'parslet/parser'
 require 'parslet/error_reporter'
 require 'parslet/scope'
--- a/lib/parslet/accelerator.rb
+++ b/lib/parslet/accelerator.rb
@@ -0,0 +1,161 @@
 # Optimizes the parsers by pattern matching on the parser atoms and replacing
 # matches with better versions. See the file qed/accelerators.md for a more
 # in-depth description.
 #
 # Example: 
 #   quote = str('"')
 #   parser = quote >> (quote.absent? >> any).repeat >> quote
 #
 #   A = Accelerator # for making what follows a bit shorter
 #   optimized_parser = A.apply(parser, 
 #     A.rule( (A.str(:x).absent? >> A.any).repeat ) { GobbleUp.new(x) })
 #
 #   optimized_parser.parse('"Parsing is now fully optimized! (tm)"')
 #
 module Parslet::Accelerator
  # An expression to match against a tree of parser atoms. Normally, an
  # expression is produced by Parslet::Accelerator.any, 
  # Parslet::Accelerator.str or Parslet::Accelerator.re.
  #
  # Expressions can be chained much like parslet atoms can be: 
  #
  #   expr.repeat(1)      # matching repetition
  #   expr.absent?        # matching absent?
  #   expr.present?       # matching present?
  #   expr1 >> expr2      # matching a sequence
  #   expr1 | expr2       # matching an alternation
  # 
  # @see Parslet::Accelerator.str
  # @see Parslet::Accelerator.re
  # @see Parslet::Accelerator.any
  #
  # @see Parslet::Accelerator
  # 
  class Expression
    attr_reader :type
    attr_reader :args
    def initialize(type, *args)
      @type = type
      @args = args
    end
    # @return [Expression]
    def >> other_expr
      join_or_new :seq, other_expr
    end
    # @return [Expression]
    def | other_expr
      join_or_new :alt, other_expr
    end
    # @return [Expression]
    def absent?
      Expression.new(:absent, self)
    end
    # @return [Expression]
    def present?
      Expression.new(:present, self)
    end
    # @return [Expression]
    def repeat min=0, max=nil
      Expression.new(:rep, min, max, self)
    end
    # @return [Expression]
    def as name
      Expression.new(:as, name)
    end
    # @api private
    # @return [Expression]
    def join_or_new tag, other_expr
      if type == tag
        @args << other_expr
      else
        Expression.new(tag, self, other_expr)
      end
    end
  end
 module_function 
  # Returns a match expression that will match `str` parslet atoms.
  #
  # @return [Parslet::Accelerator::Expression]
  #
  def str variable, *constraints
    Expression.new(:str, variable, *constraints)
  end
  # Returns a match expression that will match `match` parslet atoms.
  #
  # @return [Parslet::Accelerator::Expression]
  #
  def re variable, *constraints
    Expression.new(:re, variable, *constraints)
  end
  # Returns a match expression that will match `any` parslet atoms.
  #
  # @return [Parslet::Accelerator::Expression]
  #
  def any
    Expression.new(:re, ".")
  end
  # Given a parslet atom and an expression, will determine if the expression
  # matches the atom. If successful, returns the bindings into the pattern
  # that were made. If no bindings had to be made to make the match successful, 
  # the empty hash is returned. 
  #
  # @param atom [Parslet::Atoms::Base] parslet atom to match against
  # @param expr [Parslet::Accelerator::Expression] expression to match
  # @return [nil, Hash] bindings for the match, nil on failure
  #
  def match atom, expr
    engine = Engine.new
    return engine.bindings if engine.match(atom, expr)
  end
  # Constructs an accelerator rule. A rule is a matching expression and the
  # code that should be executed once the expression could be bound to a 
  # parser. 
  #
  # Example: 
  #   Accelerator.rule(Accelerator.any) { Parslet.match('.') }
  #
  def rule expression, &action
    [expression, action]
  end
  # Given a parslet atom and a set of rules, tries to match the rules 
  # recursively through the parslet atom. Once a rule could be matched, 
  # its action block will be called.
  #
  # Example: 
  #   quote = str('"')
  #   parser = quote >> (quote.absent? >> any).repeat >> quote
  #
  #   A = Accelerator # for making what follows a bit shorter
  #   optimized_parser = A.apply(parser, 
  #     A.rule( (A.str(:x).absent? >> A.any).repeat ) { GobbleUp.new(x) })
  #
  #   optimized_parser.parse('"Parsing is now fully optimized! (tm)"')
  #
  # @param atom [Parslet::Atoms::Base] a parser to optimize
  # @param *rules [Parslet::Accelerator::Rule] rules produced by .rule
  # @return [Parslet::Atoms::Base] optimized parser
  #
  def apply atom, *rules
    Application.new(atom, rules).call
  end
 end
 require 'parslet/accelerator/engine'
 require 'parslet/accelerator/application'
--- a/lib/parslet/accelerator/application.rb
+++ b/lib/parslet/accelerator/application.rb
@@ -0,0 +1,62 @@
 # @api private
 module Parslet::Accelerator
  class Application
    def initialize atom, rules
      @atom = atom
      @rules = rules
    end
    def call
      @atom.accept(self)
    end
    def visit_parser(root)
      transform root.accept(self)
    end
    def visit_entity(name, block)
      transform Parslet::Atoms::Entity.new(name) { block.call.accept(self) }
    end
    def visit_named(name, atom)
      transform Parslet::Atoms::Named.new(atom.accept(self), name)
    end
    def visit_repetition(tag, min, max, atom)
      transform Parslet::Atoms::Repetition.new(atom.accept(self), min, max, tag)
    end
    def visit_alternative(alternatives)
      transform Parslet::Atoms::Alternative.new(
        *alternatives.map { |atom| atom.accept(self) })
    end
    def visit_sequence(sequence)
      transform Parslet::Atoms::Sequence.new(
        *sequence.map { |atom| atom.accept(self) })
    end
    def visit_lookahead(positive, atom)
      transform Parslet::Atoms::Lookahead.new(atom, positive)
    end
    def visit_re(regexp)
      transform Parslet::Atoms::Re.new(regexp)
    end
    def visit_str(str)
      transform Parslet::Atoms::Str.new(str)
    end
    def transform atom
      @rules.each do |expr, action|
        # Try and match each rule in turn
        binding = Parslet::Accelerator.match(atom, expr)
        if binding
          # On a successful match, allow the rule action to transform the
          # parslet into something new. 
          ctx = Parslet::Context.new(binding)
          return ctx.instance_eval(&action)
        end
      end # rules.each 
      # If no rule matches, this is the fallback - a clean new parslet atom.
      return atom
    end
  end
 end
 require 'parslet/context'
--- a/lib/parslet/accelerator/engine.rb
+++ b/lib/parslet/accelerator/engine.rb
@@ -0,0 +1,112 @@
 require 'parslet/atoms/visitor'
 module Parslet::Accelerator
  # @api private
  class Apply
    def initialize(engine, expr)
      @engine = engine
      @expr = expr
    end
    def visit_parser(root)
      false
    end
    def visit_entity(name, block)
      false
    end
    def visit_named(name, atom)
      match(:as) do |key|
        @engine.try_bind(key, name)
      end
    end
    def visit_repetition(tag, min, max, atom)
      match(:rep) do |e_min, e_max, expr|
        e_min == min && e_max == max && @engine.match(atom, expr)
      end
    end
    def visit_alternative(alternatives)
      match(:alt) do |*expressions|
        return false if alternatives.size != expressions.size
        alternatives.zip(expressions).all? do |atom, expr|
          @engine.match(atom, expr)
        end
      end
    end
    def visit_sequence(sequence)
      match(:seq) do |*expressions|
        return false if sequence.size != expressions.size
        sequence.zip(expressions).all? do |atom, expr|
          @engine.match(atom, expr)
        end
      end
    end
    def visit_lookahead(positive, atom)
      match(:absent) do |expr|
        return positive == false && @engine.match(atom, expr)
      end
      match(:present) do |expr|
        return positive == true && @engine.match(atom, expr)
      end
    end
    def visit_re(regexp)
      match(:re) do |*bind_conditions|
        bind_conditions.all? { |bind_cond| 
          @engine.try_bind(bind_cond, regexp) }
      end
    end
    def visit_str(str)
      match(:str) do |*bind_conditions|
        bind_conditions.all? { |bind_cond| 
          @engine.try_bind(bind_cond, str) }
      end
    end
    def match(type_tag)
      expr_tag = @expr.type
      if expr_tag == type_tag
        yield *@expr.args
      end
    end
  end
  # @api private
  class Engine
    attr_reader :bindings
    def initialize 
      @bindings = {}
    end
    def match(atom, expr)
      atom.accept(
        Apply.new(self, expr))
    end
    def try_bind(variable, value)
      if bound? variable
        return value == lookup(variable)
      else
        case variable
          when Symbol
            bind(variable, value)
        else
          # This does not look like a variable - let's try matching it against
          # the value: 
          variable === value
        end    
      end
    end
    def bound? var
      @bindings.has_key? var
    end
    def lookup var
      @bindings[var]
    end
    def bind var, val
      @bindings[var] = val
    end
  end
 end
--- a/lib/parslet/atoms.rb
+++ b/lib/parslet/atoms.rb
@@ -0,0 +1,35 @@
 # This is where parslets name comes from: Small parser atoms.
 #
 module Parslet::Atoms
  # The precedence module controls parenthesis during the #inspect printing
  # of parslets. It is not relevant to other aspects of the parsing. 
  #
  module Precedence
    prec = 0
    BASE       = (prec+=1)    # everything else
    LOOKAHEAD  = (prec+=1)    # &SOMETHING
    REPETITION = (prec+=1)    # 'a'+, 'a'?
    SEQUENCE   = (prec+=1)    # 'a' 'b'
    ALTERNATE  = (prec+=1)    # 'a' | 'b'
    OUTER      = (prec+=1)    # printing is done here.
  end
  require 'parslet/atoms/can_flatten'
  require 'parslet/atoms/context'
  require 'parslet/atoms/dsl'
  require 'parslet/atoms/base'
  require 'parslet/atoms/named'
  require 'parslet/atoms/lookahead'
  require 'parslet/atoms/alternative'
  require 'parslet/atoms/sequence'
  require 'parslet/atoms/repetition'
  require 'parslet/atoms/re'
  require 'parslet/atoms/str'
  require 'parslet/atoms/entity'
  require 'parslet/atoms/capture'
  require 'parslet/atoms/dynamic'
  require 'parslet/atoms/scope'
  require 'parslet/atoms/infix'
 end
--- a/lib/parslet/atoms/alternative.rb
+++ b/lib/parslet/atoms/alternative.rb
@@ -0,0 +1,50 @@
 # Alternative during matching. Contains a list of parslets that is tried each
 # one in turn. Only fails if all alternatives fail. 
 #
 # Example: 
 # 
 #   str('a') | str('b')   # matches either 'a' or 'b'
 #
 class Parslet::Atoms::Alternative < Parslet::Atoms::Base
  attr_reader :alternatives
  # Constructs an Alternative instance using all given parslets in the order
  # given. This is what happens if you call '|' on existing parslets, like 
  # this: 
  #
  #   str('a') | str('b')
  #
  def initialize(*alternatives)
    super()
    @alternatives = alternatives
    @error_msg = "Expected one of #{alternatives.inspect}"
  end
  #---
  # Don't construct a hanging tree of Alternative parslets, instead store them
  # all here. This reduces the number of objects created.
  #+++
  def |(parslet)
    self.class.new(*@alternatives + [parslet])
  end
  def try(source, context, consume_all)
    errors = alternatives.map { |a|
      success, value = result = a.apply(source, context, consume_all)
      return result if success
      # Aggregate all errors
      value
    }
    # If we reach this point, all alternatives have failed. 
    context.err(self, source, @error_msg, errors)
  end
  precedence ALTERNATE
  def to_s_inner(prec)
    alternatives.map { |a| a.to_s(prec) }.join(' / ')
  end
 end
--- a/lib/parslet/atoms/base.rb
+++ b/lib/parslet/atoms/base.rb
@@ -0,0 +1,151 @@
 # Base class for all parslets, handles orchestration of calls and implements
 # a lot of the operator and chaining methods.
 #
 # Also see Parslet::Atoms::DSL chaining parslet atoms together.
 #
 class Parslet::Atoms::Base
  include Parslet::Atoms::Precedence
  include Parslet::Atoms::DSL
  include Parslet::Atoms::CanFlatten
  # Given a string or an IO object, this will attempt a parse of its contents
  # and return a result. If the parse fails, a Parslet::ParseFailed exception
  # will be thrown. 
  #
  # @param io [String, Source] input for the parse process
  # @option options [Parslet::ErrorReporter] :reporter error reporter to use, 
  #   defaults to Parslet::ErrorReporter::Tree 
  # @option options [Boolean] :prefix Should a prefix match be accepted? 
  #   (default: false)
  # @return [Hash, Array, Parslet::Slice] PORO (Plain old Ruby object) result
  #   tree
  #
  def parse(io, options={})
    source = io.respond_to?(:line_and_column) ? 
      io : 
      Parslet::Source.new(io)
    # Try to cheat. Assuming that we'll be able to parse the input, don't 
    # run error reporting code. 
    success, value = setup_and_apply(source, nil, !options[:prefix])
    # If we didn't succeed the parse, raise an exception for the user. 
    # Stack trace will be off, but the error tree should explain the reason
    # it failed.
    unless success
      # Cheating has not paid off. Now pay the cost: Rerun the parse,
      # gathering error information in the process.
      reporter = options[:reporter] || Parslet::ErrorReporter::Tree.new
      source.pos = 0
      success, value = setup_and_apply(source, reporter, !options[:prefix])
      fail "Assertion failed: success was true when parsing with reporter" \
        if success
      # Value is a Parslet::Cause, which can be turned into an exception:
      value.raise
      fail "NEVER REACHED"
    end
    # assert: success is true
    # Extra input is now handled inline with the rest of the parsing. If 
    # really we have success == true, prefix: false and still some input 
    # is left dangling, that is a BUG.
    if !options[:prefix] && source.chars_left > 0
      fail "BUG: New error strategy should not reach this point."
    end
    return flatten(value)
  end
  # Creates a context for parsing and applies the current atom to the input. 
  # Returns the parse result. 
  #
  # @return [<Boolean, Object>] Result of the parse. If the first member is 
  #   true, the parse has succeeded. 
  def setup_and_apply(source, error_reporter, consume_all)
    context = Parslet::Atoms::Context.new(error_reporter)
    apply(source, context, consume_all)
  end
  # Calls the #try method of this parslet. Success consumes input, error will 
  # rewind the input. 
  #
  # @param source [Parslet::Source] source to read input from
  # @param context [Parslet::Atoms::Context] context to use for the parsing
  # @param consume_all [Boolean] true if the current parse must consume
  #   all input by itself.
  def apply(source, context, consume_all=false)
    old_pos = source.pos
    success, value = result = context.try_with_cache(self, source, consume_all)
    if success
      # If a consume_all parse was made and doesn't result in the consumption
      # of all the input, that is considered an error. 
      if consume_all && source.chars_left>0
        # Read 10 characters ahead. Why ten? I don't know. 
        offending_pos   = source.pos
        offending_input = source.consume(10)
        # Rewind input (as happens always in error case)
        source.pos      = old_pos
        return context.err_at(
          self, 
          source, 
          "Don't know what to do with #{offending_input.to_s.inspect}", 
          offending_pos
        ) 
      end
      # Looks like the parse was successful after all. Don't rewind the input.
      return result
    end
    # We only reach this point if the parse has failed. Rewind the input.
    source.pos = old_pos
    return result
  end
  # Override this in your Atoms::Base subclasses to implement parsing
  # behaviour. 
  #
  def try(source, context, consume_all)
    raise NotImplementedError, \
      "Atoms::Base doesn't have behaviour, please implement #try(source, context)."
  end
  # Returns true if this atom can be cached in the packrat cache. Most parslet
  # atoms are cached, so this always returns true, unless overridden.
  #
  def cached?
    true
  end
  # Debug printing - in Treetop syntax. 
  #
  def self.precedence(prec)
    define_method(:precedence) { prec }
  end
  precedence BASE
  def to_s(outer_prec=OUTER)
    if outer_prec < precedence
      "("+to_s_inner(precedence)+")"
    else
      to_s_inner(precedence)
    end
  end
  def inspect
    to_s(OUTER)
  end
 private
  # Produces an instance of Success and returns it. 
  #
  def succ(result)
    [true, result]
  end
 end
--- a/lib/parslet/atoms/can_flatten.rb
+++ b/lib/parslet/atoms/can_flatten.rb
@@ -0,0 +1,137 @@
 module Parslet::Atoms
  # A series of helper functions that have the common topic of flattening 
  # result values into the intermediary tree that consists of Ruby Hashes and 
  # Arrays. 
  #
  # This module has one main function, #flatten, that takes an annotated 
  # structure as input and returns the reduced form that users expect from 
  # Atom#parse. 
  #
  # NOTE: Since all of these functions are just that, functions without 
  # side effects, they are in a module and not in a class. Its hard to draw 
  # the line sometimes, but this is beyond. 
  #
  module CanFlatten
    # Takes a mixed value coming out of a parslet and converts it to a return
    # value for the user by dropping things and merging hashes. 
    #
    # Named is set to true if this result will be embedded in a Hash result from 
    # naming something using <code>.as(...)</code>. It changes the folding 
    # semantics of repetition.
    #
    def flatten(value, named=false)
      # Passes through everything that isn't an array of things
      return value unless value.instance_of? Array
      # Extracts the s-expression tag
      tag, *tail = value
      # Merges arrays:
      result = tail.
        map { |e| flatten(e) }            # first flatten each element
      case tag
        when :sequence
          return flatten_sequence(result)
        when :maybe
          return named ? result.first : result.first || ''
        when :repetition
          return flatten_repetition(result, named)
      end
      fail "BUG: Unknown tag #{tag.inspect}."
    end
    # Lisp style fold left where the first element builds the basis for 
    # an inject. 
    #
    def foldl(list, &block)
      return '' if list.empty?
      list[1..-1].inject(list.first, &block)
    end
    # Flatten results from a sequence of parslets. 
    #
    # @api private
    #
    def flatten_sequence(list)
      foldl(list.compact) { |r, e|        # and then merge flat elements
        merge_fold(r, e)
      }
    end
    # @api private 
    def merge_fold(l, r)
      # equal pairs: merge. ----------------------------------------------------
      if l.class == r.class
        if l.is_a?(Hash)
          warn_about_duplicate_keys(l, r)
          return l.merge(r)
        else
          return l + r
        end
      end
      # unequal pairs: hoist to same level. ------------------------------------
      # Maybe classes are not equal, but both are stringlike?
      if l.respond_to?(:to_str) && r.respond_to?(:to_str)
        # if we're merging a String with a Slice, the slice wins. 
        return r if r.respond_to? :to_slice
        return l if l.respond_to? :to_slice
        fail "NOTREACHED: What other stringlike classes are there?"
      end
      # special case: If one of them is a string/slice, the other is more important 
      return l if r.respond_to? :to_str
      return r if l.respond_to? :to_str
      # otherwise just create an array for one of them to live in 
      return l + [r] if r.class == Hash
      return [l] + r if l.class == Hash
      fail "Unhandled case when foldr'ing sequence."
    end
    # Flatten results from a repetition of a single parslet. named indicates
    # whether the user has named the result or not. If the user has named
    # the results, we want to leave an empty list alone - otherwise it is 
    # turned into an empty string. 
    #
    # @api private
    #
    def flatten_repetition(list, named)
      if list.any? { |e| e.instance_of?(Hash) }
        # If keyed subtrees are in the array, we'll want to discard all 
        # strings inbetween. To keep them, name them. 
        return list.select { |e| e.instance_of?(Hash) }
      end
      if list.any? { |e| e.instance_of?(Array) }
        # If any arrays are nested in this array, flatten all arrays to this
        # level. 
        return list.
          select { |e| e.instance_of?(Array) }.
          flatten(1)
      end
      # Consistent handling of empty lists, when we act on a named result        
      return [] if named && list.empty?
      # If there are only strings, concatenate them and return that. 
      foldl(list) { |s,e| s+e }
    end
    # That annoying warning 'Duplicate subtrees while merging result' comes 
    # from here. You should add more '.as(...)' names to your intermediary tree.
    #
    def warn_about_duplicate_keys(h1, h2)
      d = h1.keys & h2.keys
      unless d.empty?
        warn "Duplicate subtrees while merging result of \n  #{self.inspect}\nonly the values"+
             " of the latter will be kept. (keys: #{d.inspect})"
      end
    end
  end
 end
--- a/lib/parslet/atoms/capture.rb
+++ b/lib/parslet/atoms/capture.rb
@@ -0,0 +1,38 @@
 # Stores the result of matching an atom against input in the #captures in 
 # parse context. Doing so will allow you to pull parts of the ongoing parse
 # out later and use them to match other pieces of input. 
 #
 # Example: 
 #   # After this, context.captures[:an_a] returns 'a'
 #   str('a').capture(:an_a)
 #
 #   # Capture and use of the capture: (matches either 'aa' or 'bb')
 #   match['ab'].capture(:first) >> 
 #     dynamic { |src, ctx| str(ctx.captures[:first]) }
 #   
 class Parslet::Atoms::Capture < Parslet::Atoms::Base
  attr_reader :parslet, :name
  def initialize(parslet, name)
    super()
    @parslet, @name = parslet, name
  end
  def apply(source, context, consume_all)
    success, value = result = parslet.apply(source, context, consume_all)
    if success
      context.captures[name.to_sym] = 
        flatten(value)
    end
    return result
  end
  def to_s_inner(prec)
    "(#{name.inspect} = #{parslet.to_s(prec)})"
  end
 end
--- a/lib/parslet/atoms/context.rb
+++ b/lib/parslet/atoms/context.rb
@@ -0,0 +1,91 @@
 module Parslet::Atoms
  # Helper class that implements a transient cache that maps position and
  # parslet object to results. This is used for memoization in the packrat
  # style. 
  #
  # Also, error reporter is stored here and error reporting happens through
  # this class. This makes the reporting pluggable. 
  #
  class Context
    # @param reporter [#err, #err_at] Error reporter (leave empty for default 
    #   reporter)
    def initialize(reporter=Parslet::ErrorReporter::Tree.new)
      @cache = Hash.new { |h, k| h[k] = {} }
      @reporter = reporter
      @captures = Parslet::Scope.new
    end
    # Caches a parse answer for obj at source.pos. Applying the same parslet
    # at one position of input always yields the same result, unless the input
    # has changed. 
    #
    # We need the entire source here so we can ask for how many characters 
    # were consumed by a successful parse. Imitation of such a parse must 
    # advance the input pos by the same amount of bytes.
    #
    def try_with_cache(obj, source, consume_all)
      beg = source.pos
      # Not in cache yet? Return early.
      unless entry = lookup(obj, beg)
        result = obj.try(source, self, consume_all)
        if obj.cached?
          set obj, beg, [result, source.pos-beg]
        end
        return result
      end
      # the condition in unless has returned true, so entry is not nil.
      result, advance = entry
      # The data we're skipping here has been read before. (since it is in 
      # the cache) PLUS the actual contents are not interesting anymore since
      # we know obj matches at beg. So skip reading.
      source.pos = beg + advance
      return result
    end  
    # Report an error at a given position. 
    # @see ErrorReporter
    #
    def err_at(*args)
      return [false, @reporter.err_at(*args)] if @reporter
      return [false, nil]
    end
    # Report an error. 
    # @see ErrorReporter
    #
    def err(*args)
      return [false, @reporter.err(*args)] if @reporter
      return [false, nil]
    end
    # Returns the current captures made on the input (see
    # Parslet::Atoms::Base#capture). Use as follows: 
    # 
    #   context.captures[:foobar] # => returns capture :foobar
    #
    attr_reader :captures
    # Starts a new scope. Use the #scope method of Parslet::Atoms::DSL
    # to call this. 
    #
    def scope
      captures.push
      yield
    ensure
      captures.pop
    end
  private 
    def lookup(obj, pos)
      @cache[pos][obj] 
    end
    def set(obj, pos, val)
      @cache[pos][obj] = val
    end
  end
 end
--- a/lib/parslet/atoms/dsl.rb
+++ b/lib/parslet/atoms/dsl.rb
@@ -0,0 +1,109 @@
 # A mixin module that defines operations that can be called on any subclass
 # of Parslet::Atoms::Base. These operations make parslets atoms chainable and 
 # allow combination of parslet atoms to form bigger parsers.
 #
 # Example: 
 #
 #   str('foo') >> str('bar')
 #   str('f').repeat
 #   any.absent?               # also called The Epsilon
 #
 module Parslet::Atoms::DSL
  # Construct a new atom that repeats the current atom min times at least and
  # at most max times. max can be nil to indicate that no maximum is present. 
  #
  # Example: 
  #   # match any number of 'a's
  #   str('a').repeat     
  #
  #   # match between 1 and 3 'a's
  #   str('a').repeat(1,3)
  #
  def repeat(min=0, max=nil)
    Parslet::Atoms::Repetition.new(self, min, max)
  end
  # Returns a new parslet atom that is only maybe present in the input. This
  # is synonymous to calling #repeat(0,1). Generated tree value will be 
  # either nil (if atom is not present in the input) or the matched subtree. 
  #
  # Example: 
  #   str('foo').maybe
  #
  def maybe
    Parslet::Atoms::Repetition.new(self, 0, 1, :maybe)
  end
  # Chains two parslet atoms together as a sequence. 
  #
  # Example: 
  #   str('a') >> str('b')
  #
  def >>(parslet)
    Parslet::Atoms::Sequence.new(self, parslet)
  end
  # Chains two parslet atoms together to express alternation. A match will
  # always be attempted with the parslet on the left side first. If it doesn't
  # match, the right side will be tried. 
  #
  # Example:
  #   # matches either 'a' OR 'b'
  #   str('a') | str('b')
  #
  def |(parslet)
    Parslet::Atoms::Alternative.new(self, parslet)
  end
  # Tests for absence of a parslet atom in the input stream without consuming
  # it. 
  # 
  # Example: 
  #   # Only proceed the parse if 'a' is absent.
  #   str('a').absent?
  #
  def absent?
    Parslet::Atoms::Lookahead.new(self, false)
  end
  # Tests for presence of a parslet atom in the input stream without consuming
  # it. 
  # 
  # Example: 
  #   # Only proceed the parse if 'a' is present.
  #   str('a').present?
  #
  def present?
    Parslet::Atoms::Lookahead.new(self, true)
  end
  # Alias for present? that will disappear in 2.0 (deprecated)
  #
  alias prsnt? present?
  # Alias for absent? that will disappear in 2.0 (deprecated)
  #
  alias absnt? absent?
  # Marks a parslet atom as important for the tree output. This must be used 
  # to achieve meaningful output from the #parse method. 
  #
  # Example:
  #   str('a').as(:b) # will produce {:b => 'a'}
  #
  def as(name)
    Parslet::Atoms::Named.new(self, name)
  end
  # Captures a part of the input and stores it under the name given. This 
  # is very useful to create self-referential parses. A capture stores
  # the result of its parse (may be complex) on a successful parse action.
  # 
  # Example: 
  #   str('a').capture(:b)  # will store captures[:b] == 'a'
  # 
  def capture(name)
    Parslet::Atoms::Capture.new(self, name)
  end
 end
--- a/lib/parslet/atoms/dynamic.rb
+++ b/lib/parslet/atoms/dynamic.rb
@@ -0,0 +1,32 @@
 # Evaluates a block at parse time. The result from the block must be a parser
 # (something which implements #apply). In the first case, the parser will then
 # be applied to the input, creating the result. 
 #
 # Dynamic parses are never cached. 
 #
 # Example: 
 #   dynamic { rand < 0.5 ? str('a') : str('b') }
 #
 class Parslet::Atoms::Dynamic < Parslet::Atoms::Base
  attr_reader :block
  def initialize(block)
    @block = block
  end
  def cached?
    false
  end
  def try(source, context, consume_all)
    result = block.call(source, context)
    # Result is a parslet atom.
    return result.apply(source, context, consume_all)
  end
  def to_s_inner(prec)
    "dynamic { ... }"
  end
 end
--- a/lib/parslet/atoms/entity.rb
+++ b/lib/parslet/atoms/entity.rb
@@ -0,0 +1,41 @@
 # This wraps pieces of parslet definition and gives them a name. The wrapped
 # piece is lazily evaluated and cached. This has two purposes: 
 #     
 # * Avoid infinite recursion during evaluation of the definition
 # * Be able to print things by their name, not by their sometimes
 #   complicated content.
 #
 # You don't normally use this directly, instead you should generated it by
 # using the structuring method Parslet.rule.
 #
 class Parslet::Atoms::Entity < Parslet::Atoms::Base
  attr_reader :name, :block
  def initialize(name, &block)
    super()
    @name = name
    @block = block
  end
  def try(source, context, consume_all)
    parslet.apply(source, context, consume_all)
  end
  def parslet
    @parslet ||= @block.call.tap { |p| 
      raise_not_implemented unless p
    }
  end
  def to_s_inner(prec)
    name.to_s.upcase
  end  
 private 
  def raise_not_implemented
    trace = caller.reject {|l| l =~ %r{#{Regexp.escape(__FILE__)}}} # blatantly stolen from dependencies.rb in activesupport
    exception = NotImplementedError.new("rule(#{name.inspect}) { ... }  returns nil. Still not implemented, but already used?")
    exception.set_backtrace(trace)
    raise exception
  end
 end
--- a/lib/parslet/atoms/infix.rb
+++ b/lib/parslet/atoms/infix.rb
@@ -0,0 +1,121 @@
 class Parslet::Atoms::Infix < Parslet::Atoms::Base
  attr_reader :element, :operations
  def initialize(element, operations)
    super()
    @element = element
    @operations = operations
  end
  def try(source, context, consume_all)
    return catch_error {
      return succ(
        produce_tree(
          precedence_climb(source, context, consume_all)))
    }
  end
  # Turns an array of the form ['1', '+', ['2', '*', '3']] into a hash that
  # reflects the same structure.
  #
  def produce_tree(ary)
    return ary unless ary.kind_of? Array
    left = ary.shift
    until ary.empty?
      op, right = ary.shift(2)
      # p [left, op, right]
      if right.kind_of? Array
        # Subexpression -> Subhash
        left = {l: left, o: op, r: produce_tree(right)}
      else
        left = {l: left, o: op, r: right}
      end
    end
    left
  end
  # A precedence climbing algorithm married to parslet, as described here
  #   http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing/
  # 
  # @note Error handling in this routine is done by throwing :error and 
  #       as a value the error to return to parslet. This avoids cluttering
  #       the recursion logic here with parslet error handling. 
  #
  def precedence_climb(source, context, consume_all, current_prec=1, needs_element=false)
    result = []
    # To even begin parsing an arithmetic expression, there needs to be 
    # at least one @element. 
    success, value = @element.apply(source, context, false)
    unless success
      abort context.err(self, source, "#{@element.inspect} was expected", [value])
    end
    result << flatten(value, true)
    # Loop until we fail on operator matching or until input runs out.
    loop do
      op_pos = source.pos
      op_match, prec, assoc = match_operation(source, context, false)
      # If no operator could be matched here, one of several cases 
      # applies: 
      #
      # - end of file
      # - end of expression
      # - syntax error
      # 
      # We abort matching the expression here. 
      break unless op_match
      if prec >= current_prec
        next_prec = (assoc == :left) ? prec+1 : prec
        result << op_match
        result << precedence_climb(
          source, context, consume_all, next_prec, true)
      else
        source.pos = op_pos
        return unwrap(result)
      end
    end
    return unwrap(result)
  end
  def unwrap expr
    expr.size == 1 ? expr.first : expr
  end
  def match_operation(source, context, consume_all)
    errors = []
    @operations.each do |op_atom, prec, assoc|
      success, value = op_atom.apply(source, context, consume_all)
      return flatten(value, true), prec, assoc if success
      # assert: this was in fact an error, accumulate
      errors << value
    end
    return nil
  end
  def abort(error)
    throw :error, error
  end
  def catch_error
    catch(:error) { yield }
  end
  def to_s_inner(prec)
    ops = @operations.map { |o, _, _| o.inspect }.join(', ')
    "infix_expression(#{@element.inspect}, [#{ops}])"
  end
 end
--- a/lib/parslet/atoms/lookahead.rb
+++ b/lib/parslet/atoms/lookahead.rb
@@ -0,0 +1,49 @@
 # Either positive or negative lookahead, doesn't consume its input. 
 #
 # Example: 
 #
 #   str('foo').present? # matches when the input contains 'foo', but leaves it
 #
 class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
  attr_reader :positive
  attr_reader :bound_parslet
  def initialize(bound_parslet, positive=true)
    super()
    # Model positive and negative lookahead by testing this flag.
    @positive = positive
    @bound_parslet = bound_parslet
    @error_msgs = {
      :positive => ["Input should start with ", bound_parslet], 
      :negative => ["Input should not start with ", bound_parslet]
    }
  end
  def try(source, context, consume_all)
    pos = source.pos
    success, value = bound_parslet.apply(source, context, consume_all)
    if positive
      return succ(nil) if success
      return context.err_at(self, source, @error_msgs[:positive], pos)
    else
      return succ(nil) unless success
      return context.err_at(self, source, @error_msgs[:negative], pos)
    end
  # This is probably the only parslet that rewinds its input in #try.
  # Lookaheads NEVER consume their input, even on success, that's why. 
  ensure 
    source.pos = pos
  end
  precedence LOOKAHEAD
  def to_s_inner(prec)
    char = positive ? '&' : '!'
    "#{char}#{bound_parslet.to_s(prec)}"
  end
 end
--- a/lib/parslet/atoms/named.rb
+++ b/lib/parslet/atoms/named.rb
@@ -0,0 +1,32 @@
 # Names a match to influence tree construction. 
 #
 # Example: 
 #
 #   str('foo')            # will return 'foo', 
 #   str('foo').as(:foo)   # will return :foo => 'foo'
 #
 class Parslet::Atoms::Named < Parslet::Atoms::Base
  attr_reader :parslet, :name
  def initialize(parslet, name)
    super()
    @parslet, @name = parslet, name
  end
  def apply(source, context, consume_all)
    success, value = result = parslet.apply(source, context, consume_all)
    return result unless success
    succ(
      produce_return_value(
        value))
  end
  def to_s_inner(prec)
    "#{name}:#{parslet.to_s(prec)}"
  end
 private
  def produce_return_value(val)
    { name => flatten(val, true) }
  end
 end
--- a/lib/parslet/atoms/re.rb
+++ b/lib/parslet/atoms/re.rb
@@ -0,0 +1,38 @@
 # Matches a special kind of regular expression that only ever matches one
 # character at a time. Useful members of this family are: <code>character
 # ranges, \\w, \\d, \\r, \\n, ...</code>
 #
 # Example: 
 #
 #   match('[a-z]')  # matches a-z
 #   match('\s')     # like regexps: matches space characters
 #
 class Parslet::Atoms::Re < Parslet::Atoms::Base
  attr_reader :match, :re
  def initialize(match)
    super()
    @match = match.to_s
    @re    = Regexp.new(self.match, Regexp::MULTILINE)
    @error_msgs = {
      :premature  => "Premature end of input", 
      :failed     => "Failed to match #{match.inspect[1..-2]}"
    }
  end
  def try(source, context, consume_all)
    return succ(source.consume(1)) if source.matches?(@re)
    # No string could be read
    return context.err(self, source, @error_msgs[:premature]) \
      if source.chars_left < 1
    # No match
    return context.err(self, source, @error_msgs[:failed])
  end
  def to_s_inner(prec)
    match.inspect[1..-2]
  end
 end
--- a/lib/parslet/atoms/repetition.rb
+++ b/lib/parslet/atoms/repetition.rb
@@ -0,0 +1,83 @@
 # Matches a parslet repeatedly. 
 #
 # Example: 
 #
 #   str('a').repeat(1,3)  # matches 'a' at least once, but at most three times
 #   str('a').maybe        # matches 'a' if it is present in the input (repeat(0,1))
 #
 class Parslet::Atoms::Repetition < Parslet::Atoms::Base  
  attr_reader :min, :max, :parslet
  def initialize(parslet, min, max, tag=:repetition)
    super()
    raise ArgumentError, 
      "Asking for zero repetitions of a parslet. (#{parslet.inspect} repeating #{min},#{max})" \
      if max == 0
    @parslet = parslet
    @min, @max = min, max
    @tag = tag
    @error_msgs = {
      :minrep  => "Expected at least #{min} of #{parslet.inspect}", 
      :unconsumed => "Extra input after last repetition"
    }
  end
  def try(source, context, consume_all)
    occ = 0
    accum = [@tag]   # initialize the result array with the tag (for flattening)
    start_pos = source.pos
    break_on = nil
    loop do
      success, value = parslet.apply(source, context, false)
      break_on = value
      break unless success
      occ += 1
      accum << value
      # If we're not greedy (max is defined), check if that has been reached. 
      return succ(accum) if max && occ>=max
    end
    # Last attempt to match parslet was a failure, failure reason in break_on.
    # Greedy matcher has produced a failure. Check if occ (which will
    # contain the number of successes) is >= min.
    return context.err_at(
      self, 
      source, 
      @error_msgs[:minrep], 
      start_pos, 
      [break_on]) if occ < min
    # consume_all is true, that means that we're inside the part of the parser
    # that should consume the input completely. Repetition failing here means
    # probably that we didn't. 
    #
    # We have a special clause to create an error here because otherwise
    # break_on would get thrown away. It turns out, that contains very
    # interesting information in a lot of cases. 
    #
    return context.err(
      self, 
      source, 
      @error_msgs[:unconsumed], 
      [break_on]) if consume_all && source.chars_left>0
    return succ(accum)
  end
  precedence REPETITION
  def to_s_inner(prec)
    minmax = "{#{min}, #{max}}"
    minmax = '?' if min == 0 && max == 1
    parslet.to_s(prec) + minmax
  end
 end
--- a/lib/parslet/atoms/scope.rb
+++ b/lib/parslet/atoms/scope.rb
@@ -0,0 +1,26 @@
 # Starts a new scope in the parsing process. Please also see the #captures
 # method. 
 #
 class Parslet::Atoms::Scope < Parslet::Atoms::Base
  attr_reader :block
  def initialize(block)
    super()
    @block = block
  end
  def cached?
    false
  end
  def apply(source, context, consume_all)
    context.scope do
      parslet = block.call
      return parslet.apply(source, context, consume_all)
    end
  end
  def to_s_inner(prec)
    "scope { #{block.call.to_s(prec)} }"
  end
 end
--- a/lib/parslet/atoms/sequence.rb
+++ b/lib/parslet/atoms/sequence.rb
@@ -0,0 +1,45 @@
 # A sequence of parslets, matched from left to right. Denoted by '>>'
 #
 # Example: 
 #
 #   str('a') >> str('b')  # matches 'a', then 'b'
 #
 class Parslet::Atoms::Sequence < Parslet::Atoms::Base
  attr_reader :parslets
  def initialize(*parslets)
    super()
    @parslets = parslets
    @error_msgs = {
      :failed  => "Failed to match sequence (#{self.inspect})"
    }
  end
  def >>(parslet)
    self.class.new(* @parslets+[parslet])
  end
  def try(source, context, consume_all)
    # Presize an array
    result = Array.new(parslets.size + 1)
    result[0] = :sequence
    parslets.each_with_index do |p, idx|
      child_consume_all = consume_all && (idx == parslets.size-1)
      success, value = p.apply(source, context, child_consume_all) 
      unless success
        return context.err(self, source, @error_msgs[:failed], [value]) 
      end
      result[idx+1] = value
    end
    return succ(result)
  end
  precedence SEQUENCE
  def to_s_inner(prec)
    parslets.map { |p| p.to_s(prec) }.join(' ')
  end
 end
--- a/lib/parslet/atoms/str.rb
+++ b/lib/parslet/atoms/str.rb
@@ -0,0 +1,39 @@
 # Matches a string of characters. 
 #
 # Example: 
 # 
 #   str('foo') # matches 'foo'
 #
 class Parslet::Atoms::Str < Parslet::Atoms::Base
  attr_reader :str
  def initialize(str)
    super()
    @str = str.to_s
    @pat = Regexp.new(Regexp.escape(str))
    @len = str.size
    @error_msgs = {
      :premature  => "Premature end of input", 
      :failed     => "Expected #{str.inspect}, but got "
    }
  end
  def try(source, context, consume_all)
    return succ(source.consume(@len)) if source.matches?(@pat)
    # Input ending early:
    return context.err(self, source, @error_msgs[:premature]) \
      if source.chars_left<@len
    # Expected something, but got something else instead:  
    error_pos = source.pos  
    return context.err_at(
      self, source, 
      [@error_msgs[:failed], source.consume(@len)], error_pos) 
  end
  def to_s_inner(prec)
    "'#{str}'"
  end
 end
--- a/lib/parslet/atoms/visitor.rb
+++ b/lib/parslet/atoms/visitor.rb
@@ -0,0 +1,89 @@
 # Augments all parslet atoms with an accept method that will call back 
 # to the visitor given.
 # 
 module Parslet::Atoms
  class Base
    def accept(visitor)
      raise NotImplementedError, "No #accept method on #{self.class.name}."
    end
  end
  class Str
    # Call back visitors #visit_str method. See parslet/export for an example.
    #
    def accept(visitor)
      visitor.visit_str(str)
    end
  end
  class Entity
    # Call back visitors #visit_entity method. See parslet/export for an
    # example. 
    #
    def accept(visitor)
      visitor.visit_entity(name, block)
    end
  end
  class Named
    # Call back visitors #visit_named method. See parslet/export for an
    # example. 
    #
    def accept(visitor)
      visitor.visit_named(name, parslet)
    end
  end
  class Sequence
    # Call back visitors #visit_sequence method. See parslet/export for an
    # example. 
    #
    def accept(visitor)
      visitor.visit_sequence(parslets)
    end
  end
  class Repetition
    # Call back visitors #visit_repetition method. See parslet/export for an
    # example. 
    #
    def accept(visitor)
      visitor.visit_repetition(@tag, min, max, parslet)
    end
  end
  class Alternative
    # Call back visitors #visit_alternative method. See parslet/export for an
    # example. 
    #
    def accept(visitor)
      visitor.visit_alternative(alternatives)
    end
  end
  class Lookahead
    # Call back visitors #visit_lookahead method. See parslet/export for an
    # example. 
    #
    def accept(visitor)
      visitor.visit_lookahead(positive, bound_parslet)
    end
  end
  class Re
    # Call back visitors #visit_re method. See parslet/export for an example. 
    #
    def accept(visitor)
      visitor.visit_re(match)
    end
  end
 end
 class Parslet::Parser
  # Call back visitors #visit_parser method. 
  #
  def accept(visitor)
    visitor.visit_parser(root)
  end
 end
--- a/lib/parslet/cause.rb
+++ b/lib/parslet/cause.rb
@@ -0,0 +1,94 @@
 module Parslet
  # Represents a cause why a parse did fail. A lot of these objects are
  # constructed - not all of the causes turn out to be failures for the whole
  # parse. 
  #
  class Cause
    def initialize(message, source, pos, children)
      @message, @source, @pos, @children = 
        message, source, pos, children
    end
    # @return [String, Array] A string or an array of message pieces that 
    #   provide failure information. Use #to_s to get a formatted string.
    attr_reader :message
    # @return [Parslet::Source] Source that was parsed when this error 
    #   happend. Mainly used for line number information.
    attr_reader :source
    # Location of the error. 
    #
    # @return [Fixnum] Position where the error happened. (character offset)
    attr_reader :pos 
    # When this cause is part of a tree of error causes: child nodes for this
    # node. Very often carries the reasons for this cause. 
    #
    # @return [Array<Parslet::Cause>] A list of reasons for this cause. 
    def children
      @children ||= []
    end
    # Appends 'at line LINE char CHAR' to the string given. Use +pos+ to
    # override the position of the +source+. This method returns an object
    # that can be turned into a string using #to_s.
    #
    # @param source [Parslet::Source] source that was parsed when this error
    #   happened 
    # @param pos [Fixnum] position of error
    # @param str [String, Array<String>] message parts
    # @param children [Array<Parslet::Cause>] child nodes for this error tree
    # @return [Parslet::Cause] a new instance of {Parslet::Cause}
    #
    def self.format(source, pos, str, children=[])
      self.new(str, source, pos, children)
    end
    def to_s
      line, column = source.line_and_column(pos)
      # Allow message to be a list of objects. Join them here, since we now
      # really need it. 
      Array(message).map { |o| 
        o.respond_to?(:to_slice) ? 
          o.str.inspect : 
          o.to_s }.join + " at line #{line} char #{column}."
    end
    # Signals to the outside that the parse has failed. Use this in
    # conjunction with .format for nice error messages. 
    #
    def raise(exception_klass=Parslet::ParseFailed)
      exception = exception_klass.new(self.to_s, self)
      Kernel.raise exception
    end
    # Returns an ascii tree representation of the causes of this node and its
    # children. 
    #
    def ascii_tree
      StringIO.new.tap { |io| 
        recursive_ascii_tree(self, io, [true]) }.
        string
    end
  private
    def recursive_ascii_tree(node, stream, curved)
      append_prefix(stream, curved)
      stream.puts node.to_s
      node.children.each do |child|
        last_child = (node.children.last == child)
        recursive_ascii_tree(child, stream, curved + [last_child])
      end
    end
    def append_prefix(stream, curved)
      return if curved.size < 2
      curved[1..-2].each do |c|
        stream.print c ? "   " : "|  "
      end
      stream.print curved.last ? "`- " : "|- "
    end
  end
 end
--- a/lib/parslet/context.rb
+++ b/lib/parslet/context.rb
@@ -0,0 +1,33 @@
 require 'blankslate'
 # Provides a context for tree transformations to run in. The context allows
 # accessing each of the bindings in the bindings hash as local method.
 #
 # Example: 
 #
 #   ctx = Context.new(:a => :b)
 #   ctx.instance_eval do 
 #     a # => :b
 #   end
 #
 # @api private
 class Parslet::Context < BlankSlate
  reveal :methods
  reveal :respond_to?
  reveal :inspect
  reveal :to_s
  reveal :instance_variable_set
  def meta_def(name, &body)
    metaclass = class <<self; self; end
    metaclass.send(:define_method, name, &body)
  end
  def initialize(bindings)
    bindings.each do |key, value|
      meta_def(key.to_sym) { value }
      instance_variable_set("@#{key}", value)
    end
  end
 end
--- a/lib/parslet/convenience.rb
+++ b/lib/parslet/convenience.rb
@@ -0,0 +1,33 @@
 class Parslet::Atoms::Base
  # Packages the common idiom
  #    
  #    begin
  #      tree = parser.parse('something')
  #    rescue Parslet::ParseFailed => error
  #      puts parser.cause.ascii_tree
  #    end
  #
  # into a convenient method.
  #
  # Usage:
  #   
  #   require 'parslet'
  #   require 'parslet/convenience'
  #   
  #   class FooParser < Parslet::Parser
  #     rule(:foo) { str('foo') }
  #     root(:foo)
  #   end
  #   
  #   FooParser.new.parse_with_debug('bar')
  #
  # @see Parslet::Atoms::Base#parse
  #
  def parse_with_debug str, opts={}
    parse str, opts
  rescue Parslet::ParseFailed => error
    puts error.cause.ascii_tree
  end
 end
--- a/lib/parslet/error_reporter.rb
+++ b/lib/parslet/error_reporter.rb
@@ -0,0 +1,7 @@
 # A namespace for all error reporters.
 #
 module Parslet::ErrorReporter
 end
 require 'parslet/error_reporter/tree'
 require 'parslet/error_reporter/deepest'
--- a/lib/parslet/error_reporter/deepest.rb
+++ b/lib/parslet/error_reporter/deepest.rb
@@ -0,0 +1,95 @@
 module Parslet
  module ErrorReporter
    # Instead of reporting the latest error that happens like {Tree} does,
    # this class reports the deepest error. Depth is defined here as how
    # advanced into the input an error happens. The errors close to the
    # greatest depth tend to be more relevant to the end user, since they
    # specify what could be done to make them go away. 
    #
    # More specifically, errors produced by this reporter won't be related to
    # the structure of the grammar at all. The positions of the errors will 
    # be advanced and convey at every grammar level what the deepest rule
    # was to fail. 
    #
    class Deepest
      def initialize
        @deepest_cause = nil
      end
      # Produces an error cause that combines the message at the current level
      # with the errors that happened at a level below (children).
      #
      # @param atom [Parslet::Atoms::Base] parslet that failed
      # @param source [Source] Source that we're using for this parse. (line 
      #   number information...)
      # @param message [String, Array] Error message at this level.
      # @param children [Array] A list of errors from a deeper level (or nil).
      # @return [Cause] An error tree combining children with message.
      #
      def err(atom, source, message, children=nil)
        position = source.pos
        cause = Cause.format(source, position, message, children)
        return deepest(cause)
      end
      # Produces an error cause that combines the message at the current level
      # with the errors that happened at a level below (children).
      #
      # @param atom [Parslet::Atoms::Base] parslet that failed
      # @param source [Source] Source that we're using for this parse. (line 
      #   number information...)
      # @param message [String, Array] Error message at this level.
      # @param pos [Fixnum] The real position of the error.
      # @param children [Array] A list of errors from a deeper level (or nil).
      # @return [Cause] An error tree combining children with message.
      #
      def err_at(atom, source, message, pos, children=nil)
        position = pos
        cause = Cause.format(source, position, message, children)
        return deepest(cause)
      end
      # Returns the cause that is currently deepest. Mainly for specs. 
      #
      attr_reader :deepest_cause
      # Checks to see if the lineage of the cause given includes a cause with
      # an error position deeper than the current deepest cause stored. If
      # yes, it passes the cause through to the caller. If no, it returns the
      # current deepest error that was saved as a reference.
      #
      def deepest(cause)
        rank, leaf = deepest_child(cause)
        if !deepest_cause || leaf.pos >= deepest_cause.pos
          # This error reaches deeper into the input, save it as reference.
          @deepest_cause = leaf
          return cause
        end
        return deepest_cause
      end
    private
      # Returns the leaf from a given error tree with the biggest rank. 
      #
      def deepest_child(cause, rank=0)
        max_child = cause
        max_rank  = rank
        if cause.children && !cause.children.empty?
          cause.children.each do |child|
            c_rank, c_cause = deepest_child(child, rank+1)
            if c_rank > max_rank
              max_rank = c_rank
              max_child = c_cause
            end
          end
        end
        return max_rank, max_child
      end
    end
  end
 end
--- a/lib/parslet/error_reporter/tree.rb
+++ b/lib/parslet/error_reporter/tree.rb
@@ -0,0 +1,57 @@
 module Parslet
  module ErrorReporter
    # An error reporter has two central methods, one for reporting errors at
    # the current parse position (#err) and one for reporting errors at a
    # given parse position (#err_at). The reporter can return an object (a
    # 'cause') that will be returned to the caller along with the information
    # that the parse failed. 
    # 
    # When reporting errors on the outer levels of your parser, these methods
    # get passed a list of error objects ('causes') from the inner levels. In
    # this default implementation, the inner levels are considered error
    # subtrees and are appended to the generated tree node at each level,
    # thereby constructing an error tree. 
    #
    # This error tree will report in parallel with the grammar structure that
    # failed. A one-to-one correspondence exists between each error in the 
    # tree and the parslet atom that produced that error. 
    #
    # The implementor is really free to use these return values as he sees
    # fit. One example would be to return an error state object from these
    # methods that is then updated as errors cascade up the parse derivation
    # tree. 
    #
    class Tree
      # Produces an error cause that combines the message at the current level
      # with the errors that happened at a level below (children).
      #
      # @param atom [Parslet::Atoms::Base] parslet that failed
      # @param source [Source] Source that we're using for this parse. (line 
      #   number information...)
      # @param message [String, Array] Error message at this level.
      # @param children [Array] A list of errors from a deeper level (or nil).
      # @return [Cause] An error tree combining children with message.
      #
      def err(atom, source, message, children=nil)
        position = source.pos
        Cause.format(source, position, message, children)
      end
      # Produces an error cause that combines the message at the current level
      # with the errors that happened at a level below (children).
      #
      # @param atom [Parslet::Atoms::Base] parslet that failed
      # @param source [Source] Source that we're using for this parse. (line 
      #   number information...)
      # @param message [String, Array] Error message at this level.
      # @param pos [Fixnum] The real position of the error.
      # @param children [Array] A list of errors from a deeper level (or nil).
      # @return [Cause] An error tree combining children with message.
      #
      def err_at(atom, source, message, pos, children=nil)
        position = pos
        Cause.format(source, position, message, children)
      end
    end
  end
 end
--- a/lib/parslet/export.rb
+++ b/lib/parslet/export.rb
@@ -0,0 +1,162 @@
 # Allows exporting parslet grammars to other lingos. 
 require 'set'
 require 'parslet/atoms/visitor'
 class Parslet::Parser
  module Visitors
    class Citrus
      attr_reader :context, :output
      def initialize(context)
        @context = context
      end
      def visit_str(str)
        "\"#{str.inspect[1..-2]}\""
      end
      def visit_re(match)
        match.to_s
      end
      def visit_entity(name, block)
        context.deferred(name, block)
        "(#{context.mangle_name(name)})"
      end
      def visit_named(name, parslet)
        parslet.accept(self)
      end
      def visit_sequence(parslets)
        '(' <<
        parslets.
          map { |el| el.accept(self) }.
          join(' ') <<
        ')'
      end
      def visit_repetition(tag, min, max, parslet)
        parslet.accept(self) << "#{min}*#{max}"
      end
      def visit_alternative(alternatives)
        '(' <<
        alternatives.
          map { |el| el.accept(self) }.
          join(' | ') <<
        ')'
      end
      def visit_lookahead(positive, bound_parslet)
        (positive ? '&' : '!') <<
        bound_parslet.accept(self)
      end
    end
    class Treetop < Citrus
      def visit_repetition(tag, min, max, parslet)
        parslet.accept(self) << "#{min}..#{max}"
      end
      def visit_alternative(alternatives)
        '(' <<
        alternatives.
          map { |el| el.accept(self) }.
          join(' / ') <<
        ')'
      end
    end
  end
  # A helper class that formats Citrus and Treetop grammars as a string. 
  #
  class PrettyPrinter
    attr_reader :visitor
    def initialize(visitor_klass)
      @visitor = visitor_klass.new(self)
    end
    # Pretty prints the given parslet using the visitor that has been
    # configured in initialize. Returns the string representation of the
    # Citrus or Treetop grammar.
    #
    def pretty_print(name, parslet)
      output = "grammar #{name}\n"
      output << rule('root', parslet)
      seen = Set.new
      loop do
        # @todo is constantly filled by the visitor (see #deferred). We 
        # keep going until it is empty.
        break if @todo.empty?
        name, block = @todo.shift
        # Track what rules we've already seen. This breaks loops.
        next if seen.include?(name)
        seen << name
        output << rule(name, block.call)
      end
      output << "end\n"
    end
    # Formats a rule in either dialect. 
    #
    def rule(name, parslet)
      "  rule #{mangle_name name}\n" << 
      "    " << parslet.accept(visitor) << "\n" <<
      "  end\n"
    end
    # Whenever the visitor encounters an rule in a parslet, it defers the
    # pretty printing of the rule by calling this method. 
    #
    def deferred(name, content)
      @todo ||= []
      @todo << [name, content]
    end
    # Mangles names so that Citrus and Treetop can live with it. This mostly
    # transforms some of the things that Ruby allows into other patterns. If
    # there is collision, we will not detect it for now. 
    #
    def mangle_name(str)
      str.to_s.sub(/\?$/, '_p')
    end
  end
  # Exports the current parser instance as a string in the Citrus dialect. 
  #
  # Example: 
  #
  #   require 'parslet/export'
  #   class MyParser < Parslet::Parser
  #     root(:expression)
  #     rule(:expression) { str('foo') }
  #   end
  #   
  #   MyParser.new.to_citrus # => a citrus grammar as a string
  #
  def to_citrus
    PrettyPrinter.new(Visitors::Citrus).
      pretty_print(self.class.name, root)
  end
  # Exports the current parser instance as a string in the Treetop dialect. 
  #
  # Example: 
  #
  #   require 'parslet/export'
  #   class MyParser < Parslet::Parser
  #     root(:expression)
  #     rule(:expression) { str('foo') }
  #   end
  #   
  #   MyParser.new.to_treetop # => a treetop grammar as a string
  #
  def to_treetop
    PrettyPrinter.new(Visitors::Treetop).
      pretty_print(self.class.name, root)
  end
 end
--- a/lib/parslet/expression.rb
+++ b/lib/parslet/expression.rb
@@ -0,0 +1,51 @@
 # Allows specifying rules as strings using the exact same grammar that treetop
 # does, minus the actions. This is on one hand a good example of a fully
 # fledged parser and on the other hand might even turn out really useful. 
 #
 # This can be viewed as an extension to parslet and might even be hosted in
 # its own gem one fine day. 
 # 
 class Parslet::Expression
  include Parslet
  autoload :Treetop, 'parslet/expression/treetop'
  # Creates a parslet from a foreign language expression. 
  #
  # Example: 
  #   
  #   Parslet::Expression.new("'a' 'b'")
  #
  def initialize(str, opts={}, context=self)
    @type = opts[:type] || :treetop
    @exp = str
    @parslet = transform(
      parse(str))
  end
  # Transforms the parse tree into a parslet expression. 
  #
  def transform(tree)
    transform = Treetop::Transform.new
    # pp tree
    transform.apply(tree)
  rescue 
    warn "Could not transform: " + tree.inspect
    raise
  end
  # Parses the string and returns a parse tree.
  #
  def parse(str)
    parser = Treetop::Parser.new
    parser.parse(str)
  end
  # Turns this expression into a parslet.
  #
  def to_parslet
    @parslet
  end
 end
--- a/lib/parslet/expression/treetop.rb
+++ b/lib/parslet/expression/treetop.rb
@@ -0,0 +1,92 @@
 class Parslet::Expression::Treetop
  class Parser < Parslet::Parser
    root(:expression)
    rule(:expression) { alternatives }
    # alternative 'a' / 'b'
    rule(:alternatives) {
      (simple >> (spaced('/') >> simple).repeat).as(:alt)
    }
    # sequence by simple concatenation 'a' 'b'
    rule(:simple) { occurrence.repeat(1).as(:seq) }
    # occurrence modifiers
    rule(:occurrence) {
      atom.as(:repetition) >> spaced('*').as(:sign) |
      atom.as(:repetition) >> spaced('+').as(:sign) |
      atom.as(:repetition) >> repetition_spec |
      atom.as(:maybe) >> spaced('?') | 
      atom
    }
    rule(:atom) { 
      spaced('(') >> expression.as(:unwrap) >> spaced(')') |
      dot |
      string |
      char_class
    }
    # a character class
    rule(:char_class) {
      (str('[') >>
        (str('\\') >> any |
        str(']').absent? >> any).repeat(1) >>
      str(']')).as(:match) >> space?
    }
    # anything at all
    rule(:dot) { spaced('.').as(:any) }
    # recognizing strings
    rule(:string) {
      str('\'') >> 
      (
        (str('\\') >> any) |
        (str("'").absent? >> any)
      ).repeat.as(:string) >> 
      str('\'') >> space?
    }
    # repetition specification like {1, 2}
    rule(:repetition_spec) {
      spaced('{') >> 
        integer.maybe.as(:min) >> spaced(',') >> 
        integer.maybe.as(:max) >> spaced('}')
    }
    rule(:integer) {
      match['0-9'].repeat(1)
    }
    # whitespace handling
    rule(:space) { match("\s").repeat(1) }
    rule(:space?) { space.maybe }
    def spaced(str)
      str(str) >> space?
    end
  end
  class Transform < Parslet::Transform
    rule(:repetition => simple(:rep), :sign => simple(:sign)) { 
      min = sign=='+' ? 1 : 0
      Parslet::Atoms::Repetition.new(rep, min, nil) }
    rule(:repetition => simple(:rep), :min => simple(:min), :max => simple(:max)) { 
      Parslet::Atoms::Repetition.new(rep, 
        Integer(min || 0), 
        max && Integer(max) || nil) }
    rule(:alt => subtree(:alt))       { Parslet::Atoms::Alternative.new(*alt) }
    rule(:seq => sequence(:s))        { Parslet::Atoms::Sequence.new(*s) }
    rule(:unwrap => simple(:u))       { u }
    rule(:maybe => simple(:m))        { |d| d[:m].maybe }
    rule(:string => simple(:s))       { Parslet::Atoms::Str.new(s) }
    rule(:match => simple(:m))        { Parslet::Atoms::Re.new(m) }
    rule(:any => simple(:a))          { Parslet::Atoms::Re.new('.') }
  end
 end
--- a/lib/parslet/graphviz.rb
+++ b/lib/parslet/graphviz.rb
@@ -0,0 +1,97 @@
 # Paints a graphviz graph of your parser.
 begin
  require 'ruby-graphviz'
 rescue LoadError
  puts "Please install the 'ruby-graphviz' gem first."
  fail
 end
 require 'set'
 require 'parslet/atoms/visitor'
 module Parslet
  class GraphvizVisitor
    def initialize g
      @graph = g
      @known_links = Set.new
      @visited = Set.new
    end
    attr_reader :parent
    def visit_parser(root)
      recurse root, node('parser')
    end
    def visit_entity(name, block)
      s = node(name)
      downwards s
      return if @visited.include?(name)
      @visited << name
      recurse block.call, s
    end
    def visit_named(name, atom)
      recurse atom, parent
    end
    def visit_repetition(tag, min, max, atom)
      recurse atom, parent
    end
    def visit_alternative(alternatives)
      p = parent
      alternatives.each do |atom|
        recurse atom, p
      end
    end
    def visit_sequence(sequence)
      p = parent
      sequence.each do |atom|
        recurse atom, p
      end
    end
    def visit_lookahead(positive, atom)
      recurse atom, parent
    end
    def visit_re(regexp)
      # downwards node(regexp.object_id, label: escape("re(#{regexp.inspect})"))
    end
    def visit_str(str)
      # downwards node(str.object_id, label: escape("#{str.inspect}"))
    end
    def escape str
      str.gsub('"', "'")
    end
    def node name, opts={}
      @graph.add_nodes name.to_s, opts
    end
    def downwards child
      if @parent && !@known_links.include?([@parent, child])
        @graph.add_edges(@parent, child)
        @known_links << [@parent, child]
      end
    end
    def recurse node, current
      @parent = current
      node.accept(self)
    end
  end
  module Graphable
    def graph opts
      g = GraphViz.new(:G, type: :digraph)
      visitor = GraphvizVisitor.new(g)
      new.accept(visitor)
      g.output opts
    end
  end
  class Parser # reopen for introducing the .graph method
    extend Graphable
  end
 end
--- a/lib/parslet/parser.rb
+++ b/lib/parslet/parser.rb
@@ -0,0 +1,67 @@
 # The base class for all your parsers. Use as follows: 
 #
 #   require 'parslet'
 #        
 #   class MyParser < Parslet::Parser
 #     rule(:a) { str('a').repeat }
 #     root(:a)        
 #   end
 #        
 #   pp MyParser.new.parse('aaaa')   # => 'aaaa'
 #   pp MyParser.new.parse('bbbb')   # => Parslet::Atoms::ParseFailed: 
 #                                   #    Don't know what to do with bbbb at line 1 char 1.
 #
 # Parslet::Parser is also a grammar atom. This means that you can mix full 
 # fledged parsers freely with small parts of a different parser. 
 #
 # Example: 
 #   class ParserA < Parslet::Parser
 #     root :aaa
 #     rule(:aaa) { str('a').repeat(3,3) }
 #   end
 #   class ParserB < Parslet::Parser
 #     root :expression
 #     rule(:expression) { str('b') >> ParserA.new >> str('b') }
 #   end
 #
 # In the above example, ParserB would parse something like 'baaab'. 
 #
 class Parslet::Parser < Parslet::Atoms::Base
  include Parslet
  class <<self # class methods
    # Define the parsers #root function. This is the place where you start 
    # parsing; if you have a rule for 'file' that describes what should be 
    # in a file, this would be your root declaration: 
    #
    #   class Parser
    #     root :file
    #     rule(:file) { ... }
    #   end
    #
    # #root declares a 'parse' function that works just like the parse 
    # function that you can call on a simple parslet, taking a string as input
    # and producing parse output. 
    #
    # In a way, #root is a shorthand for: 
    #
    #   def parse(str)
    #     your_parser_root.parse(str)
    #   end
    #
    def root(name)
      define_method(:root) do
        self.send(name)
      end
    end
  end
  def try(source, context, consume_all)
    root.try(source, context, consume_all)
  end
  def to_s_inner(prec)
    root.to_s(prec)
  end
 end
--- a/lib/parslet/pattern.rb
+++ b/lib/parslet/pattern.rb
@@ -0,0 +1,114 @@
 # Matches trees against expressions. Trees are formed by arrays and hashes
 # for expressing membership and sequence. The leafs of the tree are other
 # classes. 
 #
 # A tree issued by the parslet library might look like this: 
 #
 #   { 
 #     :function_call => {
 #       :name => 'foobar', 
 #       :args => [1, 2, 3]
 #     }
 #   }
 #
 # A pattern that would match against this tree would be: 
 #
 #   { :function_call => { :name => simple(:name), :args => sequence(:args) }}
 #
 # Note that Parslet::Pattern only matches at a given subtree; it wont try 
 # to match recursively. To do that, please use Parslet::Transform. 
 #
 class Parslet::Pattern  
  def initialize(pattern)
    @pattern = pattern
  end
  # Decides if the given subtree matches this pattern. Returns the bindings
  # made on a successful match or nil if the match fails. If you specify 
  # bindings to be a hash, the mappings in it will be treated like bindings
  # made during an attempted match. 
  #
  #   Pattern.new('a').match('a', :foo => 'bar') # => { :foo => 'bar' }
  #
  # @param subtree [String, Hash, Array] poro subtree returned by a parse
  # @param bindings [Hash] variable bindings to be verified
  # @return [Hash, nil] On success: variable bindings that allow a match. On 
  #   failure: nil
  #
  def match(subtree, bindings=nil)
    bindings = bindings && bindings.dup || Hash.new
    return bindings if element_match(subtree, @pattern, bindings)
  end
  # Returns true if the tree element given by +tree+ matches the expression
  # given by +exp+. This match must respect bindings already made in
  # +bindings+. Note that bindings is carried along and modified. 
  #
  # @api private
  #
  def element_match(tree, exp, bindings) 
    # p [:elm, tree, exp]
    case [tree, exp].map { |e| e.class }
      when [Hash,Hash]
        return element_match_hash(tree, exp, bindings)
      when [Array,Array]
        return element_match_ary_single(tree, exp, bindings)
    else
      # If elements match exactly, then that is good enough in all cases
      return true if exp === tree
      # If exp is a bind variable: Check if the binding matches
      if exp.respond_to?(:can_bind?) && exp.can_bind?(tree)
        return element_match_binding(tree, exp, bindings)
      end
      # Otherwise: No match (we don't know anything about the element
      # combination)
      return false
    end
  end
  # @api private
  #
  def element_match_binding(tree, exp, bindings)
    var_name = exp.variable_name
    # TODO test for the hidden :_ feature.
    if var_name && bound_value = bindings[var_name]
      return bound_value == tree
    end
    # New binding: 
    bindings.store var_name, tree
    return true
  end
  # @api private
  #
  def element_match_ary_single(sequence, exp, bindings)
    return false if sequence.size != exp.size
    return sequence.zip(exp).all? { |elt, subexp|
      element_match(elt, subexp, bindings) }
  end
  # @api private
  #
  def element_match_hash(tree, exp, bindings)
    # Early failure when one hash is bigger than the other
    return false unless exp.size == tree.size
    # We iterate over expected pattern, since we demand that the keys that
    # are there should be in tree as well.
    exp.each do |expected_key, expected_value|
      return false unless tree.has_key? expected_key
      # Recurse into the value and stop early on failure
      value = tree[expected_key]
      return false unless element_match(value, expected_value, bindings)
    end
    return true
  end  
 end
--- a/lib/parslet/pattern/binding.rb
+++ b/lib/parslet/pattern/binding.rb
@@ -0,0 +1,49 @@
 # Used internally for representing a bind placeholder in a Parslet::Transform
 # pattern. This is the superclass for all bindings. 
 #
 # It defines the most permissive kind of bind, the one that matches any subtree
 # whatever it looks like. 
 #
 class Parslet::Pattern::SubtreeBind < Struct.new(:symbol)
  def variable_name
    symbol
  end
  def inspect
    "#{bind_type_name}(#{symbol.inspect})"
  end
  def can_bind?(subtree)
    true
  end
 private 
  def bind_type_name 
    if md=self.class.name.match(/(\w+)Bind/)
      md.captures.first.downcase
    else
      # This path should never be used, but since this is for inspection only, 
      # let's not raise.
      'unknown_bind'
    end
  end
 end
 # Binds a symbol to a simple subtree, one that is not either a sequence of
 # elements or a collection of attributes. 
 #
 class Parslet::Pattern::SimpleBind < Parslet::Pattern::SubtreeBind
  def can_bind?(subtree)
    not [Hash, Array].include?(subtree.class)
  end
 end
 # Binds a symbol to a sequence of simple leafs ([element1, element2, ...])
 #
 class Parslet::Pattern::SequenceBind < Parslet::Pattern::SubtreeBind
  def can_bind?(subtree)
    subtree.kind_of?(Array) &&
      (not subtree.any? { |el| [Hash, Array].include?(el.class) })
  end
 end
--- a/lib/parslet/rig/rspec.rb
+++ b/lib/parslet/rig/rspec.rb
@@ -0,0 +1,59 @@
 RSpec::Matchers.define(:parse) do |input, opts|
  as = block = nil
  result = trace = nil
  unless self.respond_to? :failure_message # if RSpec 2.x
    class << self
      alias_method :failure_message, :failure_message_for_should
      alias_method :failure_message_when_negated, :failure_message_for_should_not
    end
  end
  match do |parser|
    begin
      result = parser.parse(input)
      block ? 
        block.call(result) : 
        (as == result || as.nil?)
    rescue Parslet::ParseFailed => ex
      trace = ex.cause.ascii_tree if opts && opts[:trace]
      false
    end
  end
  failure_message do |is|
    if block
      "expected output of parsing #{input.inspect}" <<
      " with #{is.inspect} to meet block conditions, but it didn't"
    else
      "expected " << 
        (as ? 
          "output of parsing #{input.inspect}"<<
          " with #{is.inspect} to equal #{as.inspect}, but was #{result.inspect}" : 
          "#{is.inspect} to be able to parse #{input.inspect}") << 
        (trace ? 
          "\n"+trace : 
          '')
    end
  end
  failure_message_when_negated do |is|
    if block
      "expected output of parsing #{input.inspect} with #{is.inspect} not to meet block conditions, but it did"
    else
      "expected " << 
        (as ? 
          "output of parsing #{input.inspect}"<<
          " with #{is.inspect} not to equal #{as.inspect}" :
          "#{is.inspect} to not parse #{input.inspect}, but it did")
    end
  end
  # NOTE: This has a nodoc tag since the rdoc parser puts this into 
  # Object, a thing I would never allow. 
  chain :as do |expected_output, &block|
    as = expected_output
    block = block
  end
 end
--- a/lib/parslet/scope.rb
+++ b/lib/parslet/scope.rb
@@ -0,0 +1,42 @@
 class Parslet::Scope
  # Raised when the accessed slot has never been assigned a value. 
  #
  class NotFound < StandardError
  end
  class Binding
    attr_reader :parent
    def initialize(parent=nil)
      @parent = parent
      @hash = Hash.new
    end
    def [](k)
      @hash.has_key?(k) && @hash[k] ||
        parent && parent[k] or 
        raise NotFound
    end
    def []=(k,v)
      @hash.store(k,v)
    end
  end
  def [](k)
    @current[k]
  end
  def []=(k,v)
    @current[k] = v
  end
  def initialize
    @current = Binding.new
  end
  def push
    @current = Binding.new(@current)
  end
  def pop
    @current = @current.parent
  end
 end
--- a/lib/parslet/slice.rb
+++ b/lib/parslet/slice.rb
@@ -0,0 +1,101 @@
 # A slice is a small part from the parse input. A slice mainly behaves like
 # any other string, except that it remembers where it came from (offset in
 # original input).
 #
 # == Extracting line and column
 #
 # Using the #line_and_column method, you can extract the line and column in
 # the original input where this slice starts.
 #
 # Example:
 #   slice.line_and_column # => [1, 13]
 #   slice.offset          # => 12
 #
 # == Likeness to strings
 #
 # Parslet::Slice behaves in many ways like a Ruby String. This likeness
 # however is not complete - many of the myriad of operations String supports
 # are not yet in Slice. You can always extract the internal string instance by
 # calling #to_s.
 #
 # These omissions are somewhat intentional. Rather than maintaining a full
 # delegation, we opt for a partial emulation that gets the job done.
 #
 class Parslet::Slice
  attr_reader :str, :offset
  attr_reader :line_cache
  # Construct a slice using a string, an offset and an optional line cache. 
  # The line cache should be able to answer to the #line_and_column message. 
  #
  def initialize(string, offset, line_cache=nil)
    @str, @offset = string, offset
    @line_cache = line_cache
  end
  # Compares slices to other slices or strings.
  #
  def == other
    str == other
  end
  # Match regular expressions.
  #
  def match(regexp)
    str.match(regexp)
  end
  # Returns the slices size in characters.
  #
  def size
    str.size
  end
  # Concatenate two slices; it is assumed that the second slice begins 
  # where the first one ends. The offset of the resulting slice is the same
  # as the one of this slice. 
  #
  def +(other)
    self.class.new(str + other.to_s, offset, line_cache)
  end
  # Returns a <line, column> tuple referring to the original input.
  #
  def line_and_column
    raise ArgumentError, "No line cache was given, cannot infer line and column." \
      unless line_cache
    line_cache.line_and_column(self.offset)
  end
  # Conversion operators -----------------------------------------------------
  def to_str
    str
  end
  alias to_s to_str
  def to_slice
    self
  end
  def to_sym
    str.to_sym
  end
  def to_int
    Integer(str)
  end
  def to_i
    str.to_i
  end
  def to_f
    str.to_f
  end
  # Inspection & Debugging ---------------------------------------------------
  # Prints the slice as <code>"string"@offset</code>.
  def inspect
    str.inspect << "@#{offset}"
  end
 end
--- a/lib/parslet/source.rb
+++ b/lib/parslet/source.rb
@@ -0,0 +1,87 @@
 require 'stringio'
 require 'strscan'
 require 'parslet/source/line_cache'
 module Parslet
  # Wraps the input string for parslet. 
  #
  class Source
    def initialize(str)
      raise(
        ArgumentError, 
        "Must construct Source with a string like object."
      ) unless str.respond_to?(:to_str)
      @str = StringScanner.new(str)
      # maps 1 => /./m, 2 => /../m, etc...
      @re_cache = Hash.new { |h,k| 
        h[k] = /(.|$){#{k}}/m }
      @line_cache = LineCache.new
      @line_cache.scan_for_line_endings(0, str)
    end
    # Checks if the given pattern matches at the current input position. 
    #
    # @param pattern [Regexp] pattern to check for
    # @return [Boolean] true if the pattern matches at #pos
    #
    def matches?(pattern)
      @str.match?(pattern)
    end
    alias match matches?
    # Consumes n characters from the input, returning them as a slice of the
    # input. 
    #
    def consume(n)
      original_pos = @str.pos
      slice_str = @str.scan(@re_cache[n])
      slice = Parslet::Slice.new(
        slice_str,
        original_pos,
        @line_cache)
      return slice
    end
    # Returns how many chars remain in the input. 
    #
    def chars_left
      @str.rest_size
    end
    # Returns how many chars there are between current position and the 
    # string given. If the string given doesn't occur in the source, then 
    # the remaining chars (#chars_left) are returned. 
    #
    # @return [Fixnum] count of chars until str or #chars_left
    #
    def chars_until str
      slice_str = @str.check_until(Regexp.new(Regexp.escape(str)))
      return chars_left unless slice_str
      return slice_str.size - str.size
    end
    # Position of the parse as a character offset into the original string. 
    # @note: Encodings...
    def pos
      @str.pos
    end
    def pos=(n)
      @str.pos = n
    rescue RangeError
    end
    # Returns a <line, column> tuple for the given position. If no position is
    # given, line/column information is returned for the current position
    # given by #pos. 
    #
    def line_and_column(position=nil)
      @line_cache.line_and_column(position || self.pos)
    end
  end
 end
--- a/lib/parslet/source/line_cache.rb
+++ b/lib/parslet/source/line_cache.rb
@@ -0,0 +1,96 @@
 class Parslet::Source
  # A cache for line start positions. 
  #
  class LineCache 
    def initialize
      # Stores line endings as a simple position number. The first line always
      # starts at 0; numbers beyond the biggest entry are on any line > size, 
      # but probably make a scan to that position neccessary.
      @line_ends = []
      @line_ends.extend RangeSearch
    end
    # Returns a <line, column> tuple for the given input position. 
    # 
    def line_and_column(pos)
      eol_idx = @line_ends.lbound(pos)
      if eol_idx
        # eol_idx points to the offset that ends the current line.
        # Let's try to find the offset that starts it: 
        offset = eol_idx>0 && @line_ends[eol_idx-1] || 0
        return [eol_idx+1, pos-offset+1]
      else
        # eol_idx is nil, that means that we're beyond the last line end that
        # we know about. Pretend for now that we're just on the last line.
        offset = @line_ends.last || 0
        return [@line_ends.size+1, pos-offset+1]
      end
    end
    def scan_for_line_endings(start_pos, buf)
      return unless buf
      buf = StringScanner.new(buf)
      return unless buf.exist?(/\n/)
      ## If we have already read part or all of buf, we already know about
      ## line ends in that portion. remove it and correct cur (search index)
      if @last_line_end && start_pos < @last_line_end
        # Let's not search the range from start_pos to last_line_end again.
        buf.pos = @last_line_end - start_pos
      end
      ## Scan the string for line endings; store the positions of all endings
      ## in @line_ends. 
      while buf.skip_until(/\n/)
        @last_line_end = start_pos + buf.pos
        @line_ends << @last_line_end
      end
    end
  end
  # Mixin for arrays that implicitly give a number of ranges, where one range
  # begins where the other one ends.
  # 
  #   Example: 
  #
  #     [10, 20, 30]
  #     # would describe [0, 10], (10, 20], (20, 30]
  #
  module RangeSearch 
    def find_mid(left, right)
      # NOTE: Jonathan Hinkle reported that when mathn is required, just
      # dividing and relying on the integer truncation is not enough.
      left + ((right - left) / 2).floor
    end  
    # Scans the array for the first number that is > than bound. Returns the 
    # index of that number. 
    #
    def lbound(bound)
      return nil if empty?
      return nil unless last > bound
      left = 0
      right = size - 1 
      loop do
        mid = find_mid(left, right)
        if self[mid] > bound
          right = mid
        else
          # assert: self[mid] <= bound
          left = mid+1
        end
        if right <= left
          return right
        end
      end
    end
  end
 end
--- a/lib/parslet/transform.rb
+++ b/lib/parslet/transform.rb
@@ -0,0 +1,236 @@
 require 'parslet/pattern'
 # Transforms an expression tree into something else. The transformation
 # performs a depth-first, post-order traversal of the expression tree. During
 # that traversal, each time a rule matches a node, the node is replaced by the
 # result of the block associated to the rule. Otherwise the node is accepted
 # as is into the result tree.
 #
 # This is almost what you would generally do with a tree visitor, except that
 # you can match several levels of the tree at once. 
 #
 # As a consequence of this, the resulting tree will contain pieces of the
 # original tree and new pieces. Most likely, you will want to transform the
 # original tree wholly, so this isn't a problem.
 #
 # You will not be able to create a loop, given that each node will be replaced
 # only once and then left alone. This means that the results of a replacement
 # will not be acted upon. 
 #
 # Example: 
 #
 #   class Example < Parslet::Transform
 #     rule(:string => simple(:x)) {  # (1)
 #       StringLiteral.new(x)
 #     }
 #   end
 #
 # A tree transform (Parslet::Transform) is defined by a set of rules. Each
 # rule can be defined by calling #rule with the pattern as argument. The block
 # given will be called every time the rule matches somewhere in the tree given
 # to #apply. It is passed a Hash containing all the variable bindings of this
 # pattern match. 
 #   
 # In the above example, (1) illustrates a simple matching rule. 
 #
 # Let's say you want to parse matching parentheses and distill a maximum nest
 # depth. You would probably write a parser like the one in example/parens.rb;
 # here's the relevant part: 
 #
 #   rule(:balanced) {
 #     str('(').as(:l) >> balanced.maybe.as(:m) >> str(')').as(:r)
 #   }
 #
 # If you now apply this to a string like '(())', you get a intermediate parse
 # tree that looks like this: 
 #
 #   {
 #     l: '(', 
 #     m: {
 #       l: '(', 
 #       m: nil, 
 #       r: ')' 
 #     }, 
 #     r: ')' 
 #   }
 #
 # This parse tree is good for debugging, but what we would really like to have
 # is just the nesting depth. This transformation rule will produce that: 
 #
 #   rule(:l => '(', :m => simple(:x), :r => ')') { 
 #     # innermost :m will contain nil
 #     x.nil? ? 1 : x+1
 #   }
 #
 # = Usage patterns
 #
 # There are four ways of using this class. The first one is very much
 # recommended, followed by the second one for generality. The other ones are
 # omitted here. 
 #
 # Recommended usage is as follows: 
 #
 #   class MyTransformator < Parslet::Transform
 #     rule(...) { ... }
 #     rule(...) { ... }
 #     # ...
 #   end
 #   MyTransformator.new.apply(tree)
 #
 # Alternatively, you can use the Transform class as follows: 
 #
 #   transform = Parslet::Transform.new do
 #     rule(...) { ... }
 #   end
 #   transform.apply(tree)
 #
 # = Execution context
 #
 # The execution context of action blocks differs depending on the arity of 
 # said blocks. This can be confusing. It is however somewhat intentional. You 
 # should not create fat Transform descendants containing a lot of helper methods, 
 # instead keep your AST class construction in global scope or make it available
 # through a factory. The following piece of code illustrates usage of global
 # scope: 
 #
 #   transform = Parslet::Transform.new do
 #     rule(...) { AstNode.new(a_variable) }
 #     rule(...) { Ast.node(a_variable) } # modules are nice
 #   end
 #   transform.apply(tree)
 #
 # And here's how you would use a class builder (a factory):
 #
 #   transform = Parslet::Transform.new do
 #     rule(...) { builder.add_node(a_variable) }
 #     rule(...) { |d| d[:builder].add_node(d[:a_variable]) }
 #   end
 #   transform.apply(tree, :builder => Builder.new)
 #
 # As you can see, Transform allows you to inject local context for your rule
 # action blocks to use. 
 #
 class Parslet::Transform
  # FIXME: Maybe only part of it? Or maybe only include into constructor
  # context?
  include Parslet   
  class << self
    # FIXME: Only do this for subclasses?
    include Parslet
    # Define a rule for the transform subclass. 
    #
    def rule(expression, &block)
      @__transform_rules ||= []
      @__transform_rules << [Parslet::Pattern.new(expression), block]
    end
    # Allows accessing the class' rules
    #
    def rules 
      @__transform_rules || []
    end
  end
  def initialize(&block) 
    @rules = []
    if block
      instance_eval(&block)
    end
  end
  # Defines a rule to be applied whenever apply is called on a tree. A rule
  # is composed of two parts: 
  # 
  # * an *expression pattern*
  # * a *transformation block*
  #
  def rule(expression, &block)
    @rules << [
      Parslet::Pattern.new(expression), 
      block
    ]
  end
  # Applies the transformation to a tree that is generated by Parslet::Parser
  # or a simple parslet. Transformation will proceed down the tree, replacing
  # parts/all of it with new objects. The resulting object will be returned. 
  #
  def apply(obj, context=nil)
    transform_elt(
      case obj
        when Hash
          recurse_hash(obj, context)
        when Array
          recurse_array(obj, context)
      else
        obj
      end, 
      context
    )
  end
  # Executes the block on the bindings obtained by Pattern#match, if such a match
  # can be made. Depending on the arity of the given block, it is called in 
  # one of two environments: the current one or a clean toplevel environment.
  #
  # If you would like the current environment preserved, please use the 
  # arity 1 variant of the block. Alternatively, you can inject a context object
  # and call methods on it (think :ctx => self).
  #
  #   # the local variable a is simulated
  #   t.call_on_match(:a => :b) { a } 
  #   # no change of environment here
  #   t.call_on_match(:a => :b) { |d| d[:a] }
  #
  def call_on_match(bindings, block)
    if block
      if block.arity == 1
        return block.call(bindings)
      else
        context = Context.new(bindings)
        return context.instance_eval(&block)
      end
    end
  end
  # Allow easy access to all rules, the ones defined in the instance and the 
  # ones predefined in a subclass definition. 
  #
  def rules 
    self.class.rules + @rules
  end
  # @api private 
  #
  def transform_elt(elt, context) 
    rules.each do |pattern, block|
      if bindings=pattern.match(elt, context)
        # Produces transformed value
        return call_on_match(bindings, block)
      end
    end
    # No rule matched - element is not transformed
    return elt
  end
  # @api private 
  #
  def recurse_hash(hsh, ctx) 
    hsh.inject({}) do |new_hsh, (k,v)|
      new_hsh[k] = apply(v, ctx)
      new_hsh
    end
  end
  # @api private 
  #
  def recurse_array(ary, ctx) 
    ary.map { |elt| apply(elt, ctx) }
  end
 end
 require 'parslet/context'