vendored parslet, deemed stable enough and better without dependency
This commit is contained in:
parent
6fafeda66d
commit
b1203363d4
@ -1,5 +1,3 @@
|
|||||||
# parslet is assumed to be checked out at the same level as crystal for now
|
|
||||||
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', ".." , "parslet",'lib'))
|
|
||||||
require 'parslet'
|
require 'parslet'
|
||||||
|
|
||||||
require "asm/program"
|
require "asm/program"
|
||||||
|
302
lib/parslet.rb
Normal file
302
lib/parslet.rb
Normal file
@ -0,0 +1,302 @@
|
|||||||
|
# A simple parser generator library. Typical usage would look like this:
|
||||||
|
#
|
||||||
|
# require 'parslet'
|
||||||
|
#
|
||||||
|
# class MyParser < Parslet::Parser
|
||||||
|
# rule(:a) { str('a').repeat }
|
||||||
|
# root(:a)
|
||||||
|
# end
|
||||||
|
#
|
||||||
|
# pp MyParser.new.parse('aaaa') # => 'aaaa'@0
|
||||||
|
# pp MyParser.new.parse('bbbb') # => Parslet::Atoms::ParseFailed:
|
||||||
|
# # Don't know what to do with bbbb at line 1 char 1.
|
||||||
|
#
|
||||||
|
# The simple DSL allows you to define grammars in PEG-style. This kind of
|
||||||
|
# grammar construction does away with the ambiguities that usually comes with
|
||||||
|
# parsers; instead, it allows you to construct grammars that are easier to
|
||||||
|
# debug, since less magic is involved.
|
||||||
|
#
|
||||||
|
# Parslet is typically used in stages:
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# * Parsing the input string; this yields an intermediary tree, see
|
||||||
|
# Parslet.any, Parslet.match, Parslet.str, Parslet::ClassMethods#rule and
|
||||||
|
# Parslet::ClassMethods#root.
|
||||||
|
# * Transformation of the tree into something useful to you, see
|
||||||
|
# Parslet::Transform, Parslet.simple, Parslet.sequence and Parslet.subtree.
|
||||||
|
#
|
||||||
|
# The first stage is traditionally intermingled with the second stage; output
|
||||||
|
# from the second stage is usually called the 'Abstract Syntax Tree' or AST.
|
||||||
|
#
|
||||||
|
# The stages are completely decoupled; You can change your grammar around and
|
||||||
|
# use the second stage to isolate the rest of your code from the changes
|
||||||
|
# you've effected.
|
||||||
|
#
|
||||||
|
# == Further reading
|
||||||
|
#
|
||||||
|
# All parslet atoms are subclasses of {Parslet::Atoms::Base}. You might want to
|
||||||
|
# look at all of those: {Parslet::Atoms::Re}, {Parslet::Atoms::Str},
|
||||||
|
# {Parslet::Atoms::Repetition}, {Parslet::Atoms::Sequence},
|
||||||
|
# {Parslet::Atoms::Alternative}.
|
||||||
|
#
|
||||||
|
# == When things go wrong
|
||||||
|
#
|
||||||
|
# A parse that fails will raise {Parslet::ParseFailed}. This exception contains
|
||||||
|
# all the details of what went wrong, including a detailed error trace that
|
||||||
|
# can be printed out as an ascii tree. ({Parslet::Cause})
|
||||||
|
#
|
||||||
|
module Parslet
|
||||||
|
# Extends classes that include Parslet with the module
|
||||||
|
# {Parslet::ClassMethods}.
|
||||||
|
#
|
||||||
|
def self.included(base)
|
||||||
|
base.extend(ClassMethods)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Raised when the parse failed to match. It contains the message that should
|
||||||
|
# be presented to the user. More details can be extracted from the
|
||||||
|
# exceptions #cause member: It contains an instance of {Parslet::Cause} that
|
||||||
|
# stores all the details of your failed parse in a tree structure.
|
||||||
|
#
|
||||||
|
# begin
|
||||||
|
# parslet.parse(str)
|
||||||
|
# rescue Parslet::ParseFailed => failure
|
||||||
|
# puts failure.cause.ascii_tree
|
||||||
|
# end
|
||||||
|
#
|
||||||
|
# Alternatively, you can just require 'parslet/convenience' and call the
|
||||||
|
# method #parse_with_debug instead of #parse. This method will never raise
|
||||||
|
# and print error trees to stdout.
|
||||||
|
#
|
||||||
|
# require 'parslet/convenience'
|
||||||
|
# parslet.parse_with_debug(str)
|
||||||
|
#
|
||||||
|
class ParseFailed < StandardError
|
||||||
|
def initialize(message, cause=nil)
|
||||||
|
super(message)
|
||||||
|
@cause = cause
|
||||||
|
end
|
||||||
|
|
||||||
|
# Why the parse failed.
|
||||||
|
#
|
||||||
|
# @return [Parslet::Cause]
|
||||||
|
attr_reader :cause
|
||||||
|
end
|
||||||
|
|
||||||
|
module ClassMethods
|
||||||
|
# Define an entity for the parser. This generates a method of the same
|
||||||
|
# name that can be used as part of other patterns. Those methods can be
|
||||||
|
# freely mixed in your parser class with real ruby methods.
|
||||||
|
#
|
||||||
|
# class MyParser
|
||||||
|
# include Parslet
|
||||||
|
#
|
||||||
|
# rule(:bar) { str('bar') }
|
||||||
|
# rule(:twobar) do
|
||||||
|
# bar >> bar
|
||||||
|
# end
|
||||||
|
#
|
||||||
|
# root :twobar
|
||||||
|
# end
|
||||||
|
#
|
||||||
|
def rule(name, &definition)
|
||||||
|
define_method(name) do
|
||||||
|
@rules ||= {} # <name, rule> memoization
|
||||||
|
return @rules[name] if @rules.has_key?(name)
|
||||||
|
|
||||||
|
# Capture the self of the parser class along with the definition.
|
||||||
|
definition_closure = proc {
|
||||||
|
self.instance_eval(&definition)
|
||||||
|
}
|
||||||
|
|
||||||
|
@rules[name] = Atoms::Entity.new(name, &definition_closure)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Allows for delayed construction of #match. See also Parslet.match.
|
||||||
|
#
|
||||||
|
# @api private
|
||||||
|
class DelayedMatchConstructor
|
||||||
|
def [](str)
|
||||||
|
Atoms::Re.new("[" + str + "]")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Returns an atom matching a character class. All regular expressions can be
|
||||||
|
# used, as long as they match only a single character at a time.
|
||||||
|
#
|
||||||
|
# match('[ab]') # will match either 'a' or 'b'
|
||||||
|
# match('[\n\s]') # will match newlines and spaces
|
||||||
|
#
|
||||||
|
# There is also another (convenience) form of this method:
|
||||||
|
#
|
||||||
|
# match['a-z'] # synonymous to match('[a-z]')
|
||||||
|
# match['\n'] # synonymous to match('[\n]')
|
||||||
|
#
|
||||||
|
# @overload match(str)
|
||||||
|
# @param str [String] character class to match (regexp syntax)
|
||||||
|
# @return [Parslet::Atoms::Re] a parslet atom
|
||||||
|
#
|
||||||
|
def match(str=nil)
|
||||||
|
return DelayedMatchConstructor.new unless str
|
||||||
|
|
||||||
|
return Atoms::Re.new(str)
|
||||||
|
end
|
||||||
|
module_function :match
|
||||||
|
|
||||||
|
# Returns an atom matching the +str+ given:
|
||||||
|
#
|
||||||
|
# str('class') # will match 'class'
|
||||||
|
#
|
||||||
|
# @param str [String] string to match verbatim
|
||||||
|
# @return [Parslet::Atoms::Str] a parslet atom
|
||||||
|
#
|
||||||
|
def str(str)
|
||||||
|
Atoms::Str.new(str)
|
||||||
|
end
|
||||||
|
module_function :str
|
||||||
|
|
||||||
|
# Returns an atom matching any character. It acts like the '.' (dot)
|
||||||
|
# character in regular expressions.
|
||||||
|
#
|
||||||
|
# any.parse('a') # => 'a'
|
||||||
|
#
|
||||||
|
# @return [Parslet::Atoms::Re] a parslet atom
|
||||||
|
#
|
||||||
|
def any
|
||||||
|
Atoms::Re.new('.')
|
||||||
|
end
|
||||||
|
module_function :any
|
||||||
|
|
||||||
|
# Introduces a new capture scope. This means that all old captures stay
|
||||||
|
# accessible, but new values stored will only be available during the block
|
||||||
|
# given and the old values will be restored after the block.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# # :a will be available until the end of the block. Afterwards,
|
||||||
|
# # :a from the outer scope will be available again, if such a thing
|
||||||
|
# # exists.
|
||||||
|
# scope { str('a').capture(:a) }
|
||||||
|
#
|
||||||
|
def scope(&block)
|
||||||
|
Parslet::Atoms::Scope.new(block)
|
||||||
|
end
|
||||||
|
module_function :scope
|
||||||
|
|
||||||
|
# Designates a piece of the parser as being dynamic. Dynamic parsers can
|
||||||
|
# either return a parser at runtime, which will be applied on the input, or
|
||||||
|
# return a result from a parse.
|
||||||
|
#
|
||||||
|
# Dynamic parse pieces are never cached and can introduce performance
|
||||||
|
# abnormalitites - use sparingly where other constructs fail.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# # Parses either 'a' or 'b', depending on the weather
|
||||||
|
# dynamic { rand() < 0.5 ? str('a') : str('b') }
|
||||||
|
#
|
||||||
|
def dynamic(&block)
|
||||||
|
Parslet::Atoms::Dynamic.new(block)
|
||||||
|
end
|
||||||
|
module_function :dynamic
|
||||||
|
|
||||||
|
# Returns a parslet atom that parses infix expressions. Operations are
|
||||||
|
# specified as a list of <atom, precedence, associativity> tuples, where
|
||||||
|
# atom is simply the parslet atom that matches an operator, precedence is
|
||||||
|
# a number and associativity is either :left or :right.
|
||||||
|
#
|
||||||
|
# Higher precedence indicates that the operation should bind tighter than
|
||||||
|
# other operations with lower precedence. In common algebra, '+' has
|
||||||
|
# lower precedence than '*'. So you would have a precedence of 1 for '+' and
|
||||||
|
# a precedence of 2 for '*'. Only the order relation between these two
|
||||||
|
# counts, so any number would work.
|
||||||
|
#
|
||||||
|
# Associativity is what decides what interpretation to take for strings that
|
||||||
|
# are ambiguous like '1 + 2 + 3'. If '+' is specified as left associative,
|
||||||
|
# the expression would be interpreted as '(1 + 2) + 3'. If right
|
||||||
|
# associativity is chosen, it would be interpreted as '1 + (2 + 3)'. Note
|
||||||
|
# that the hash trees output reflect that choice as well.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# infix_expression(integer, [add_op, 1, :left])
|
||||||
|
# # would parse things like '1 + 2'
|
||||||
|
#
|
||||||
|
# @param element [Parslet::Atoms::Base] elements that take the NUMBER position
|
||||||
|
# in the expression
|
||||||
|
# @param operations [Array<(Parslet::Atoms::Base, Integer, {:left, :right})>]
|
||||||
|
#
|
||||||
|
# @see Parslet::Atoms::Infix
|
||||||
|
#
|
||||||
|
def infix_expression(element, *operations)
|
||||||
|
Parslet::Atoms::Infix.new(element, operations)
|
||||||
|
end
|
||||||
|
module_function :infix_expression
|
||||||
|
|
||||||
|
# A special kind of atom that allows embedding whole treetop expressions
|
||||||
|
# into parslet construction.
|
||||||
|
#
|
||||||
|
# # the same as str('a') >> str('b').maybe
|
||||||
|
# exp(%Q("a" "b"?))
|
||||||
|
#
|
||||||
|
# @param str [String] a treetop expression
|
||||||
|
# @return [Parslet::Atoms::Base] the corresponding parslet parser
|
||||||
|
#
|
||||||
|
def exp(str)
|
||||||
|
Parslet::Expression.new(str).to_parslet
|
||||||
|
end
|
||||||
|
module_function :exp
|
||||||
|
|
||||||
|
# Returns a placeholder for a tree transformation that will only match a
|
||||||
|
# sequence of elements. The +symbol+ you specify will be the key for the
|
||||||
|
# matched sequence in the returned dictionary.
|
||||||
|
#
|
||||||
|
# # This would match a body element that contains several declarations.
|
||||||
|
# { :body => sequence(:declarations) }
|
||||||
|
#
|
||||||
|
# The above example would match <code>:body => ['a', 'b']</code>, but not
|
||||||
|
# <code>:body => 'a'</code>.
|
||||||
|
#
|
||||||
|
# see {Parslet::Transform}
|
||||||
|
#
|
||||||
|
def sequence(symbol)
|
||||||
|
Pattern::SequenceBind.new(symbol)
|
||||||
|
end
|
||||||
|
module_function :sequence
|
||||||
|
|
||||||
|
# Returns a placeholder for a tree transformation that will only match
|
||||||
|
# simple elements. This matches everything that <code>#sequence</code>
|
||||||
|
# doesn't match.
|
||||||
|
#
|
||||||
|
# # Matches a single header.
|
||||||
|
# { :header => simple(:header) }
|
||||||
|
#
|
||||||
|
# see {Parslet::Transform}
|
||||||
|
#
|
||||||
|
def simple(symbol)
|
||||||
|
Pattern::SimpleBind.new(symbol)
|
||||||
|
end
|
||||||
|
module_function :simple
|
||||||
|
|
||||||
|
# Returns a placeholder for tree transformation patterns that will match
|
||||||
|
# any kind of subtree.
|
||||||
|
#
|
||||||
|
# { :expression => subtree(:exp) }
|
||||||
|
#
|
||||||
|
def subtree(symbol)
|
||||||
|
Pattern::SubtreeBind.new(symbol)
|
||||||
|
end
|
||||||
|
module_function :subtree
|
||||||
|
|
||||||
|
autoload :Expression, 'parslet/expression'
|
||||||
|
end
|
||||||
|
|
||||||
|
require 'parslet/slice'
|
||||||
|
require 'parslet/cause'
|
||||||
|
require 'parslet/source'
|
||||||
|
require 'parslet/atoms'
|
||||||
|
require 'parslet/pattern'
|
||||||
|
require 'parslet/pattern/binding'
|
||||||
|
require 'parslet/transform'
|
||||||
|
require 'parslet/parser'
|
||||||
|
require 'parslet/error_reporter'
|
||||||
|
require 'parslet/scope'
|
161
lib/parslet/accelerator.rb
Normal file
161
lib/parslet/accelerator.rb
Normal file
@ -0,0 +1,161 @@
|
|||||||
|
|
||||||
|
|
||||||
|
# Optimizes the parsers by pattern matching on the parser atoms and replacing
|
||||||
|
# matches with better versions. See the file qed/accelerators.md for a more
|
||||||
|
# in-depth description.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# quote = str('"')
|
||||||
|
# parser = quote >> (quote.absent? >> any).repeat >> quote
|
||||||
|
#
|
||||||
|
# A = Accelerator # for making what follows a bit shorter
|
||||||
|
# optimized_parser = A.apply(parser,
|
||||||
|
# A.rule( (A.str(:x).absent? >> A.any).repeat ) { GobbleUp.new(x) })
|
||||||
|
#
|
||||||
|
# optimized_parser.parse('"Parsing is now fully optimized! (tm)"')
|
||||||
|
#
|
||||||
|
module Parslet::Accelerator
|
||||||
|
|
||||||
|
# An expression to match against a tree of parser atoms. Normally, an
|
||||||
|
# expression is produced by Parslet::Accelerator.any,
|
||||||
|
# Parslet::Accelerator.str or Parslet::Accelerator.re.
|
||||||
|
#
|
||||||
|
# Expressions can be chained much like parslet atoms can be:
|
||||||
|
#
|
||||||
|
# expr.repeat(1) # matching repetition
|
||||||
|
# expr.absent? # matching absent?
|
||||||
|
# expr.present? # matching present?
|
||||||
|
# expr1 >> expr2 # matching a sequence
|
||||||
|
# expr1 | expr2 # matching an alternation
|
||||||
|
#
|
||||||
|
# @see Parslet::Accelerator.str
|
||||||
|
# @see Parslet::Accelerator.re
|
||||||
|
# @see Parslet::Accelerator.any
|
||||||
|
#
|
||||||
|
# @see Parslet::Accelerator
|
||||||
|
#
|
||||||
|
class Expression
|
||||||
|
attr_reader :type
|
||||||
|
attr_reader :args
|
||||||
|
|
||||||
|
def initialize(type, *args)
|
||||||
|
@type = type
|
||||||
|
@args = args
|
||||||
|
end
|
||||||
|
|
||||||
|
# @return [Expression]
|
||||||
|
def >> other_expr
|
||||||
|
join_or_new :seq, other_expr
|
||||||
|
end
|
||||||
|
|
||||||
|
# @return [Expression]
|
||||||
|
def | other_expr
|
||||||
|
join_or_new :alt, other_expr
|
||||||
|
end
|
||||||
|
|
||||||
|
# @return [Expression]
|
||||||
|
def absent?
|
||||||
|
Expression.new(:absent, self)
|
||||||
|
end
|
||||||
|
# @return [Expression]
|
||||||
|
def present?
|
||||||
|
Expression.new(:present, self)
|
||||||
|
end
|
||||||
|
|
||||||
|
# @return [Expression]
|
||||||
|
def repeat min=0, max=nil
|
||||||
|
Expression.new(:rep, min, max, self)
|
||||||
|
end
|
||||||
|
|
||||||
|
# @return [Expression]
|
||||||
|
def as name
|
||||||
|
Expression.new(:as, name)
|
||||||
|
end
|
||||||
|
|
||||||
|
# @api private
|
||||||
|
# @return [Expression]
|
||||||
|
def join_or_new tag, other_expr
|
||||||
|
if type == tag
|
||||||
|
@args << other_expr
|
||||||
|
else
|
||||||
|
Expression.new(tag, self, other_expr)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
module_function
|
||||||
|
# Returns a match expression that will match `str` parslet atoms.
|
||||||
|
#
|
||||||
|
# @return [Parslet::Accelerator::Expression]
|
||||||
|
#
|
||||||
|
def str variable, *constraints
|
||||||
|
Expression.new(:str, variable, *constraints)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Returns a match expression that will match `match` parslet atoms.
|
||||||
|
#
|
||||||
|
# @return [Parslet::Accelerator::Expression]
|
||||||
|
#
|
||||||
|
def re variable, *constraints
|
||||||
|
Expression.new(:re, variable, *constraints)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Returns a match expression that will match `any` parslet atoms.
|
||||||
|
#
|
||||||
|
# @return [Parslet::Accelerator::Expression]
|
||||||
|
#
|
||||||
|
def any
|
||||||
|
Expression.new(:re, ".")
|
||||||
|
end
|
||||||
|
|
||||||
|
# Given a parslet atom and an expression, will determine if the expression
|
||||||
|
# matches the atom. If successful, returns the bindings into the pattern
|
||||||
|
# that were made. If no bindings had to be made to make the match successful,
|
||||||
|
# the empty hash is returned.
|
||||||
|
#
|
||||||
|
# @param atom [Parslet::Atoms::Base] parslet atom to match against
|
||||||
|
# @param expr [Parslet::Accelerator::Expression] expression to match
|
||||||
|
# @return [nil, Hash] bindings for the match, nil on failure
|
||||||
|
#
|
||||||
|
def match atom, expr
|
||||||
|
engine = Engine.new
|
||||||
|
|
||||||
|
return engine.bindings if engine.match(atom, expr)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Constructs an accelerator rule. A rule is a matching expression and the
|
||||||
|
# code that should be executed once the expression could be bound to a
|
||||||
|
# parser.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# Accelerator.rule(Accelerator.any) { Parslet.match('.') }
|
||||||
|
#
|
||||||
|
def rule expression, &action
|
||||||
|
[expression, action]
|
||||||
|
end
|
||||||
|
|
||||||
|
# Given a parslet atom and a set of rules, tries to match the rules
|
||||||
|
# recursively through the parslet atom. Once a rule could be matched,
|
||||||
|
# its action block will be called.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# quote = str('"')
|
||||||
|
# parser = quote >> (quote.absent? >> any).repeat >> quote
|
||||||
|
#
|
||||||
|
# A = Accelerator # for making what follows a bit shorter
|
||||||
|
# optimized_parser = A.apply(parser,
|
||||||
|
# A.rule( (A.str(:x).absent? >> A.any).repeat ) { GobbleUp.new(x) })
|
||||||
|
#
|
||||||
|
# optimized_parser.parse('"Parsing is now fully optimized! (tm)"')
|
||||||
|
#
|
||||||
|
# @param atom [Parslet::Atoms::Base] a parser to optimize
|
||||||
|
# @param *rules [Parslet::Accelerator::Rule] rules produced by .rule
|
||||||
|
# @return [Parslet::Atoms::Base] optimized parser
|
||||||
|
#
|
||||||
|
def apply atom, *rules
|
||||||
|
Application.new(atom, rules).call
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
require 'parslet/accelerator/engine'
|
||||||
|
require 'parslet/accelerator/application'
|
62
lib/parslet/accelerator/application.rb
Normal file
62
lib/parslet/accelerator/application.rb
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
|
||||||
|
# @api private
|
||||||
|
module Parslet::Accelerator
|
||||||
|
class Application
|
||||||
|
def initialize atom, rules
|
||||||
|
@atom = atom
|
||||||
|
@rules = rules
|
||||||
|
end
|
||||||
|
|
||||||
|
def call
|
||||||
|
@atom.accept(self)
|
||||||
|
end
|
||||||
|
|
||||||
|
def visit_parser(root)
|
||||||
|
transform root.accept(self)
|
||||||
|
end
|
||||||
|
def visit_entity(name, block)
|
||||||
|
transform Parslet::Atoms::Entity.new(name) { block.call.accept(self) }
|
||||||
|
end
|
||||||
|
def visit_named(name, atom)
|
||||||
|
transform Parslet::Atoms::Named.new(atom.accept(self), name)
|
||||||
|
end
|
||||||
|
def visit_repetition(tag, min, max, atom)
|
||||||
|
transform Parslet::Atoms::Repetition.new(atom.accept(self), min, max, tag)
|
||||||
|
end
|
||||||
|
def visit_alternative(alternatives)
|
||||||
|
transform Parslet::Atoms::Alternative.new(
|
||||||
|
*alternatives.map { |atom| atom.accept(self) })
|
||||||
|
end
|
||||||
|
def visit_sequence(sequence)
|
||||||
|
transform Parslet::Atoms::Sequence.new(
|
||||||
|
*sequence.map { |atom| atom.accept(self) })
|
||||||
|
end
|
||||||
|
def visit_lookahead(positive, atom)
|
||||||
|
transform Parslet::Atoms::Lookahead.new(atom, positive)
|
||||||
|
end
|
||||||
|
def visit_re(regexp)
|
||||||
|
transform Parslet::Atoms::Re.new(regexp)
|
||||||
|
end
|
||||||
|
def visit_str(str)
|
||||||
|
transform Parslet::Atoms::Str.new(str)
|
||||||
|
end
|
||||||
|
|
||||||
|
def transform atom
|
||||||
|
@rules.each do |expr, action|
|
||||||
|
# Try and match each rule in turn
|
||||||
|
binding = Parslet::Accelerator.match(atom, expr)
|
||||||
|
if binding
|
||||||
|
# On a successful match, allow the rule action to transform the
|
||||||
|
# parslet into something new.
|
||||||
|
ctx = Parslet::Context.new(binding)
|
||||||
|
return ctx.instance_eval(&action)
|
||||||
|
end
|
||||||
|
end # rules.each
|
||||||
|
|
||||||
|
# If no rule matches, this is the fallback - a clean new parslet atom.
|
||||||
|
return atom
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
require 'parslet/context'
|
112
lib/parslet/accelerator/engine.rb
Normal file
112
lib/parslet/accelerator/engine.rb
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
|
||||||
|
require 'parslet/atoms/visitor'
|
||||||
|
|
||||||
|
module Parslet::Accelerator
|
||||||
|
# @api private
|
||||||
|
class Apply
|
||||||
|
def initialize(engine, expr)
|
||||||
|
@engine = engine
|
||||||
|
@expr = expr
|
||||||
|
end
|
||||||
|
|
||||||
|
def visit_parser(root)
|
||||||
|
false
|
||||||
|
end
|
||||||
|
def visit_entity(name, block)
|
||||||
|
false
|
||||||
|
end
|
||||||
|
def visit_named(name, atom)
|
||||||
|
match(:as) do |key|
|
||||||
|
@engine.try_bind(key, name)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
def visit_repetition(tag, min, max, atom)
|
||||||
|
match(:rep) do |e_min, e_max, expr|
|
||||||
|
e_min == min && e_max == max && @engine.match(atom, expr)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
def visit_alternative(alternatives)
|
||||||
|
match(:alt) do |*expressions|
|
||||||
|
return false if alternatives.size != expressions.size
|
||||||
|
|
||||||
|
alternatives.zip(expressions).all? do |atom, expr|
|
||||||
|
@engine.match(atom, expr)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
def visit_sequence(sequence)
|
||||||
|
match(:seq) do |*expressions|
|
||||||
|
return false if sequence.size != expressions.size
|
||||||
|
|
||||||
|
sequence.zip(expressions).all? do |atom, expr|
|
||||||
|
@engine.match(atom, expr)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
def visit_lookahead(positive, atom)
|
||||||
|
match(:absent) do |expr|
|
||||||
|
return positive == false && @engine.match(atom, expr)
|
||||||
|
end
|
||||||
|
match(:present) do |expr|
|
||||||
|
return positive == true && @engine.match(atom, expr)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
def visit_re(regexp)
|
||||||
|
match(:re) do |*bind_conditions|
|
||||||
|
bind_conditions.all? { |bind_cond|
|
||||||
|
@engine.try_bind(bind_cond, regexp) }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
def visit_str(str)
|
||||||
|
match(:str) do |*bind_conditions|
|
||||||
|
bind_conditions.all? { |bind_cond|
|
||||||
|
@engine.try_bind(bind_cond, str) }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def match(type_tag)
|
||||||
|
expr_tag = @expr.type
|
||||||
|
if expr_tag == type_tag
|
||||||
|
yield *@expr.args
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# @api private
|
||||||
|
class Engine
|
||||||
|
attr_reader :bindings
|
||||||
|
|
||||||
|
def initialize
|
||||||
|
@bindings = {}
|
||||||
|
end
|
||||||
|
|
||||||
|
def match(atom, expr)
|
||||||
|
atom.accept(
|
||||||
|
Apply.new(self, expr))
|
||||||
|
end
|
||||||
|
|
||||||
|
def try_bind(variable, value)
|
||||||
|
if bound? variable
|
||||||
|
return value == lookup(variable)
|
||||||
|
else
|
||||||
|
case variable
|
||||||
|
when Symbol
|
||||||
|
bind(variable, value)
|
||||||
|
else
|
||||||
|
# This does not look like a variable - let's try matching it against
|
||||||
|
# the value:
|
||||||
|
variable === value
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
def bound? var
|
||||||
|
@bindings.has_key? var
|
||||||
|
end
|
||||||
|
def lookup var
|
||||||
|
@bindings[var]
|
||||||
|
end
|
||||||
|
def bind var, val
|
||||||
|
@bindings[var] = val
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
35
lib/parslet/atoms.rb
Normal file
35
lib/parslet/atoms.rb
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
|
||||||
|
# This is where parslets name comes from: Small parser atoms.
|
||||||
|
#
|
||||||
|
module Parslet::Atoms
|
||||||
|
# The precedence module controls parenthesis during the #inspect printing
|
||||||
|
# of parslets. It is not relevant to other aspects of the parsing.
|
||||||
|
#
|
||||||
|
module Precedence
|
||||||
|
prec = 0
|
||||||
|
BASE = (prec+=1) # everything else
|
||||||
|
LOOKAHEAD = (prec+=1) # &SOMETHING
|
||||||
|
REPETITION = (prec+=1) # 'a'+, 'a'?
|
||||||
|
SEQUENCE = (prec+=1) # 'a' 'b'
|
||||||
|
ALTERNATE = (prec+=1) # 'a' | 'b'
|
||||||
|
OUTER = (prec+=1) # printing is done here.
|
||||||
|
end
|
||||||
|
|
||||||
|
require 'parslet/atoms/can_flatten'
|
||||||
|
require 'parslet/atoms/context'
|
||||||
|
require 'parslet/atoms/dsl'
|
||||||
|
require 'parslet/atoms/base'
|
||||||
|
require 'parslet/atoms/named'
|
||||||
|
require 'parslet/atoms/lookahead'
|
||||||
|
require 'parslet/atoms/alternative'
|
||||||
|
require 'parslet/atoms/sequence'
|
||||||
|
require 'parslet/atoms/repetition'
|
||||||
|
require 'parslet/atoms/re'
|
||||||
|
require 'parslet/atoms/str'
|
||||||
|
require 'parslet/atoms/entity'
|
||||||
|
require 'parslet/atoms/capture'
|
||||||
|
require 'parslet/atoms/dynamic'
|
||||||
|
require 'parslet/atoms/scope'
|
||||||
|
require 'parslet/atoms/infix'
|
||||||
|
end
|
||||||
|
|
50
lib/parslet/atoms/alternative.rb
Normal file
50
lib/parslet/atoms/alternative.rb
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
|
||||||
|
# Alternative during matching. Contains a list of parslets that is tried each
|
||||||
|
# one in turn. Only fails if all alternatives fail.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
#
|
||||||
|
# str('a') | str('b') # matches either 'a' or 'b'
|
||||||
|
#
|
||||||
|
class Parslet::Atoms::Alternative < Parslet::Atoms::Base
|
||||||
|
attr_reader :alternatives
|
||||||
|
|
||||||
|
# Constructs an Alternative instance using all given parslets in the order
|
||||||
|
# given. This is what happens if you call '|' on existing parslets, like
|
||||||
|
# this:
|
||||||
|
#
|
||||||
|
# str('a') | str('b')
|
||||||
|
#
|
||||||
|
def initialize(*alternatives)
|
||||||
|
super()
|
||||||
|
|
||||||
|
@alternatives = alternatives
|
||||||
|
@error_msg = "Expected one of #{alternatives.inspect}"
|
||||||
|
end
|
||||||
|
|
||||||
|
#---
|
||||||
|
# Don't construct a hanging tree of Alternative parslets, instead store them
|
||||||
|
# all here. This reduces the number of objects created.
|
||||||
|
#+++
|
||||||
|
def |(parslet)
|
||||||
|
self.class.new(*@alternatives + [parslet])
|
||||||
|
end
|
||||||
|
|
||||||
|
def try(source, context, consume_all)
|
||||||
|
errors = alternatives.map { |a|
|
||||||
|
success, value = result = a.apply(source, context, consume_all)
|
||||||
|
return result if success
|
||||||
|
|
||||||
|
# Aggregate all errors
|
||||||
|
value
|
||||||
|
}
|
||||||
|
|
||||||
|
# If we reach this point, all alternatives have failed.
|
||||||
|
context.err(self, source, @error_msg, errors)
|
||||||
|
end
|
||||||
|
|
||||||
|
precedence ALTERNATE
|
||||||
|
def to_s_inner(prec)
|
||||||
|
alternatives.map { |a| a.to_s(prec) }.join(' / ')
|
||||||
|
end
|
||||||
|
end
|
151
lib/parslet/atoms/base.rb
Normal file
151
lib/parslet/atoms/base.rb
Normal file
@ -0,0 +1,151 @@
|
|||||||
|
# Base class for all parslets, handles orchestration of calls and implements
|
||||||
|
# a lot of the operator and chaining methods.
|
||||||
|
#
|
||||||
|
# Also see Parslet::Atoms::DSL chaining parslet atoms together.
|
||||||
|
#
|
||||||
|
class Parslet::Atoms::Base
|
||||||
|
include Parslet::Atoms::Precedence
|
||||||
|
include Parslet::Atoms::DSL
|
||||||
|
include Parslet::Atoms::CanFlatten
|
||||||
|
|
||||||
|
# Given a string or an IO object, this will attempt a parse of its contents
|
||||||
|
# and return a result. If the parse fails, a Parslet::ParseFailed exception
|
||||||
|
# will be thrown.
|
||||||
|
#
|
||||||
|
# @param io [String, Source] input for the parse process
|
||||||
|
# @option options [Parslet::ErrorReporter] :reporter error reporter to use,
|
||||||
|
# defaults to Parslet::ErrorReporter::Tree
|
||||||
|
# @option options [Boolean] :prefix Should a prefix match be accepted?
|
||||||
|
# (default: false)
|
||||||
|
# @return [Hash, Array, Parslet::Slice] PORO (Plain old Ruby object) result
|
||||||
|
# tree
|
||||||
|
#
|
||||||
|
def parse(io, options={})
|
||||||
|
source = io.respond_to?(:line_and_column) ?
|
||||||
|
io :
|
||||||
|
Parslet::Source.new(io)
|
||||||
|
|
||||||
|
# Try to cheat. Assuming that we'll be able to parse the input, don't
|
||||||
|
# run error reporting code.
|
||||||
|
success, value = setup_and_apply(source, nil, !options[:prefix])
|
||||||
|
|
||||||
|
# If we didn't succeed the parse, raise an exception for the user.
|
||||||
|
# Stack trace will be off, but the error tree should explain the reason
|
||||||
|
# it failed.
|
||||||
|
unless success
|
||||||
|
# Cheating has not paid off. Now pay the cost: Rerun the parse,
|
||||||
|
# gathering error information in the process.
|
||||||
|
reporter = options[:reporter] || Parslet::ErrorReporter::Tree.new
|
||||||
|
source.pos = 0
|
||||||
|
success, value = setup_and_apply(source, reporter, !options[:prefix])
|
||||||
|
|
||||||
|
fail "Assertion failed: success was true when parsing with reporter" \
|
||||||
|
if success
|
||||||
|
|
||||||
|
# Value is a Parslet::Cause, which can be turned into an exception:
|
||||||
|
value.raise
|
||||||
|
|
||||||
|
fail "NEVER REACHED"
|
||||||
|
end
|
||||||
|
|
||||||
|
# assert: success is true
|
||||||
|
|
||||||
|
# Extra input is now handled inline with the rest of the parsing. If
|
||||||
|
# really we have success == true, prefix: false and still some input
|
||||||
|
# is left dangling, that is a BUG.
|
||||||
|
if !options[:prefix] && source.chars_left > 0
|
||||||
|
fail "BUG: New error strategy should not reach this point."
|
||||||
|
end
|
||||||
|
|
||||||
|
return flatten(value)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Creates a context for parsing and applies the current atom to the input.
|
||||||
|
# Returns the parse result.
|
||||||
|
#
|
||||||
|
# @return [<Boolean, Object>] Result of the parse. If the first member is
|
||||||
|
# true, the parse has succeeded.
|
||||||
|
def setup_and_apply(source, error_reporter, consume_all)
|
||||||
|
context = Parslet::Atoms::Context.new(error_reporter)
|
||||||
|
apply(source, context, consume_all)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Calls the #try method of this parslet. Success consumes input, error will
|
||||||
|
# rewind the input.
|
||||||
|
#
|
||||||
|
# @param source [Parslet::Source] source to read input from
|
||||||
|
# @param context [Parslet::Atoms::Context] context to use for the parsing
|
||||||
|
# @param consume_all [Boolean] true if the current parse must consume
|
||||||
|
# all input by itself.
|
||||||
|
def apply(source, context, consume_all=false)
|
||||||
|
old_pos = source.pos
|
||||||
|
|
||||||
|
success, value = result = context.try_with_cache(self, source, consume_all)
|
||||||
|
|
||||||
|
if success
|
||||||
|
# If a consume_all parse was made and doesn't result in the consumption
|
||||||
|
# of all the input, that is considered an error.
|
||||||
|
if consume_all && source.chars_left>0
|
||||||
|
# Read 10 characters ahead. Why ten? I don't know.
|
||||||
|
offending_pos = source.pos
|
||||||
|
offending_input = source.consume(10)
|
||||||
|
|
||||||
|
# Rewind input (as happens always in error case)
|
||||||
|
source.pos = old_pos
|
||||||
|
|
||||||
|
return context.err_at(
|
||||||
|
self,
|
||||||
|
source,
|
||||||
|
"Don't know what to do with #{offending_input.to_s.inspect}",
|
||||||
|
offending_pos
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Looks like the parse was successful after all. Don't rewind the input.
|
||||||
|
return result
|
||||||
|
end
|
||||||
|
|
||||||
|
# We only reach this point if the parse has failed. Rewind the input.
|
||||||
|
source.pos = old_pos
|
||||||
|
return result
|
||||||
|
end
|
||||||
|
|
||||||
|
# Override this in your Atoms::Base subclasses to implement parsing
|
||||||
|
# behaviour.
|
||||||
|
#
|
||||||
|
def try(source, context, consume_all)
|
||||||
|
raise NotImplementedError, \
|
||||||
|
"Atoms::Base doesn't have behaviour, please implement #try(source, context)."
|
||||||
|
end
|
||||||
|
|
||||||
|
# Returns true if this atom can be cached in the packrat cache. Most parslet
|
||||||
|
# atoms are cached, so this always returns true, unless overridden.
|
||||||
|
#
|
||||||
|
def cached?
|
||||||
|
true
|
||||||
|
end
|
||||||
|
|
||||||
|
# Debug printing - in Treetop syntax.
|
||||||
|
#
|
||||||
|
def self.precedence(prec)
|
||||||
|
define_method(:precedence) { prec }
|
||||||
|
end
|
||||||
|
precedence BASE
|
||||||
|
def to_s(outer_prec=OUTER)
|
||||||
|
if outer_prec < precedence
|
||||||
|
"("+to_s_inner(precedence)+")"
|
||||||
|
else
|
||||||
|
to_s_inner(precedence)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
def inspect
|
||||||
|
to_s(OUTER)
|
||||||
|
end
|
||||||
|
private
|
||||||
|
|
||||||
|
# Produces an instance of Success and returns it.
|
||||||
|
#
|
||||||
|
def succ(result)
|
||||||
|
[true, result]
|
||||||
|
end
|
||||||
|
end
|
137
lib/parslet/atoms/can_flatten.rb
Normal file
137
lib/parslet/atoms/can_flatten.rb
Normal file
@ -0,0 +1,137 @@
|
|||||||
|
|
||||||
|
module Parslet::Atoms
|
||||||
|
# A series of helper functions that have the common topic of flattening
|
||||||
|
# result values into the intermediary tree that consists of Ruby Hashes and
|
||||||
|
# Arrays.
|
||||||
|
#
|
||||||
|
# This module has one main function, #flatten, that takes an annotated
|
||||||
|
# structure as input and returns the reduced form that users expect from
|
||||||
|
# Atom#parse.
|
||||||
|
#
|
||||||
|
# NOTE: Since all of these functions are just that, functions without
|
||||||
|
# side effects, they are in a module and not in a class. Its hard to draw
|
||||||
|
# the line sometimes, but this is beyond.
|
||||||
|
#
|
||||||
|
module CanFlatten
|
||||||
|
# Takes a mixed value coming out of a parslet and converts it to a return
|
||||||
|
# value for the user by dropping things and merging hashes.
|
||||||
|
#
|
||||||
|
# Named is set to true if this result will be embedded in a Hash result from
|
||||||
|
# naming something using <code>.as(...)</code>. It changes the folding
|
||||||
|
# semantics of repetition.
|
||||||
|
#
|
||||||
|
def flatten(value, named=false)
|
||||||
|
# Passes through everything that isn't an array of things
|
||||||
|
return value unless value.instance_of? Array
|
||||||
|
|
||||||
|
# Extracts the s-expression tag
|
||||||
|
tag, *tail = value
|
||||||
|
|
||||||
|
# Merges arrays:
|
||||||
|
result = tail.
|
||||||
|
map { |e| flatten(e) } # first flatten each element
|
||||||
|
|
||||||
|
case tag
|
||||||
|
when :sequence
|
||||||
|
return flatten_sequence(result)
|
||||||
|
when :maybe
|
||||||
|
return named ? result.first : result.first || ''
|
||||||
|
when :repetition
|
||||||
|
return flatten_repetition(result, named)
|
||||||
|
end
|
||||||
|
|
||||||
|
fail "BUG: Unknown tag #{tag.inspect}."
|
||||||
|
end
|
||||||
|
|
||||||
|
# Lisp style fold left where the first element builds the basis for
|
||||||
|
# an inject.
|
||||||
|
#
|
||||||
|
def foldl(list, &block)
|
||||||
|
return '' if list.empty?
|
||||||
|
list[1..-1].inject(list.first, &block)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Flatten results from a sequence of parslets.
|
||||||
|
#
|
||||||
|
# @api private
|
||||||
|
#
|
||||||
|
def flatten_sequence(list)
|
||||||
|
foldl(list.compact) { |r, e| # and then merge flat elements
|
||||||
|
merge_fold(r, e)
|
||||||
|
}
|
||||||
|
end
|
||||||
|
# @api private
|
||||||
|
def merge_fold(l, r)
|
||||||
|
# equal pairs: merge. ----------------------------------------------------
|
||||||
|
if l.class == r.class
|
||||||
|
if l.is_a?(Hash)
|
||||||
|
warn_about_duplicate_keys(l, r)
|
||||||
|
return l.merge(r)
|
||||||
|
else
|
||||||
|
return l + r
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# unequal pairs: hoist to same level. ------------------------------------
|
||||||
|
|
||||||
|
# Maybe classes are not equal, but both are stringlike?
|
||||||
|
if l.respond_to?(:to_str) && r.respond_to?(:to_str)
|
||||||
|
# if we're merging a String with a Slice, the slice wins.
|
||||||
|
return r if r.respond_to? :to_slice
|
||||||
|
return l if l.respond_to? :to_slice
|
||||||
|
|
||||||
|
fail "NOTREACHED: What other stringlike classes are there?"
|
||||||
|
end
|
||||||
|
|
||||||
|
# special case: If one of them is a string/slice, the other is more important
|
||||||
|
return l if r.respond_to? :to_str
|
||||||
|
return r if l.respond_to? :to_str
|
||||||
|
|
||||||
|
# otherwise just create an array for one of them to live in
|
||||||
|
return l + [r] if r.class == Hash
|
||||||
|
return [l] + r if l.class == Hash
|
||||||
|
|
||||||
|
fail "Unhandled case when foldr'ing sequence."
|
||||||
|
end
|
||||||
|
|
||||||
|
# Flatten results from a repetition of a single parslet. named indicates
|
||||||
|
# whether the user has named the result or not. If the user has named
|
||||||
|
# the results, we want to leave an empty list alone - otherwise it is
|
||||||
|
# turned into an empty string.
|
||||||
|
#
|
||||||
|
# @api private
|
||||||
|
#
|
||||||
|
def flatten_repetition(list, named)
|
||||||
|
if list.any? { |e| e.instance_of?(Hash) }
|
||||||
|
# If keyed subtrees are in the array, we'll want to discard all
|
||||||
|
# strings inbetween. To keep them, name them.
|
||||||
|
return list.select { |e| e.instance_of?(Hash) }
|
||||||
|
end
|
||||||
|
|
||||||
|
if list.any? { |e| e.instance_of?(Array) }
|
||||||
|
# If any arrays are nested in this array, flatten all arrays to this
|
||||||
|
# level.
|
||||||
|
return list.
|
||||||
|
select { |e| e.instance_of?(Array) }.
|
||||||
|
flatten(1)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Consistent handling of empty lists, when we act on a named result
|
||||||
|
return [] if named && list.empty?
|
||||||
|
|
||||||
|
# If there are only strings, concatenate them and return that.
|
||||||
|
foldl(list) { |s,e| s+e }
|
||||||
|
end
|
||||||
|
|
||||||
|
# That annoying warning 'Duplicate subtrees while merging result' comes
|
||||||
|
# from here. You should add more '.as(...)' names to your intermediary tree.
|
||||||
|
#
|
||||||
|
def warn_about_duplicate_keys(h1, h2)
|
||||||
|
d = h1.keys & h2.keys
|
||||||
|
unless d.empty?
|
||||||
|
warn "Duplicate subtrees while merging result of \n #{self.inspect}\nonly the values"+
|
||||||
|
" of the latter will be kept. (keys: #{d.inspect})"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
38
lib/parslet/atoms/capture.rb
Normal file
38
lib/parslet/atoms/capture.rb
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
|
||||||
|
# Stores the result of matching an atom against input in the #captures in
|
||||||
|
# parse context. Doing so will allow you to pull parts of the ongoing parse
|
||||||
|
# out later and use them to match other pieces of input.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# # After this, context.captures[:an_a] returns 'a'
|
||||||
|
# str('a').capture(:an_a)
|
||||||
|
#
|
||||||
|
# # Capture and use of the capture: (matches either 'aa' or 'bb')
|
||||||
|
# match['ab'].capture(:first) >>
|
||||||
|
# dynamic { |src, ctx| str(ctx.captures[:first]) }
|
||||||
|
#
|
||||||
|
class Parslet::Atoms::Capture < Parslet::Atoms::Base
|
||||||
|
attr_reader :parslet, :name
|
||||||
|
|
||||||
|
def initialize(parslet, name)
|
||||||
|
super()
|
||||||
|
|
||||||
|
@parslet, @name = parslet, name
|
||||||
|
end
|
||||||
|
|
||||||
|
def apply(source, context, consume_all)
|
||||||
|
success, value = result = parslet.apply(source, context, consume_all)
|
||||||
|
|
||||||
|
if success
|
||||||
|
context.captures[name.to_sym] =
|
||||||
|
flatten(value)
|
||||||
|
end
|
||||||
|
|
||||||
|
return result
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_s_inner(prec)
|
||||||
|
"(#{name.inspect} = #{parslet.to_s(prec)})"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
91
lib/parslet/atoms/context.rb
Normal file
91
lib/parslet/atoms/context.rb
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
module Parslet::Atoms
|
||||||
|
# Helper class that implements a transient cache that maps position and
|
||||||
|
# parslet object to results. This is used for memoization in the packrat
|
||||||
|
# style.
|
||||||
|
#
|
||||||
|
# Also, error reporter is stored here and error reporting happens through
|
||||||
|
# this class. This makes the reporting pluggable.
|
||||||
|
#
|
||||||
|
class Context
|
||||||
|
# @param reporter [#err, #err_at] Error reporter (leave empty for default
|
||||||
|
# reporter)
|
||||||
|
def initialize(reporter=Parslet::ErrorReporter::Tree.new)
|
||||||
|
@cache = Hash.new { |h, k| h[k] = {} }
|
||||||
|
@reporter = reporter
|
||||||
|
@captures = Parslet::Scope.new
|
||||||
|
end
|
||||||
|
|
||||||
|
# Caches a parse answer for obj at source.pos. Applying the same parslet
|
||||||
|
# at one position of input always yields the same result, unless the input
|
||||||
|
# has changed.
|
||||||
|
#
|
||||||
|
# We need the entire source here so we can ask for how many characters
|
||||||
|
# were consumed by a successful parse. Imitation of such a parse must
|
||||||
|
# advance the input pos by the same amount of bytes.
|
||||||
|
#
|
||||||
|
def try_with_cache(obj, source, consume_all)
|
||||||
|
beg = source.pos
|
||||||
|
|
||||||
|
# Not in cache yet? Return early.
|
||||||
|
unless entry = lookup(obj, beg)
|
||||||
|
result = obj.try(source, self, consume_all)
|
||||||
|
|
||||||
|
if obj.cached?
|
||||||
|
set obj, beg, [result, source.pos-beg]
|
||||||
|
end
|
||||||
|
|
||||||
|
return result
|
||||||
|
end
|
||||||
|
|
||||||
|
# the condition in unless has returned true, so entry is not nil.
|
||||||
|
result, advance = entry
|
||||||
|
|
||||||
|
# The data we're skipping here has been read before. (since it is in
|
||||||
|
# the cache) PLUS the actual contents are not interesting anymore since
|
||||||
|
# we know obj matches at beg. So skip reading.
|
||||||
|
source.pos = beg + advance
|
||||||
|
return result
|
||||||
|
end
|
||||||
|
|
||||||
|
# Report an error at a given position.
|
||||||
|
# @see ErrorReporter
|
||||||
|
#
|
||||||
|
def err_at(*args)
|
||||||
|
return [false, @reporter.err_at(*args)] if @reporter
|
||||||
|
return [false, nil]
|
||||||
|
end
|
||||||
|
|
||||||
|
# Report an error.
|
||||||
|
# @see ErrorReporter
|
||||||
|
#
|
||||||
|
def err(*args)
|
||||||
|
return [false, @reporter.err(*args)] if @reporter
|
||||||
|
return [false, nil]
|
||||||
|
end
|
||||||
|
|
||||||
|
# Returns the current captures made on the input (see
|
||||||
|
# Parslet::Atoms::Base#capture). Use as follows:
|
||||||
|
#
|
||||||
|
# context.captures[:foobar] # => returns capture :foobar
|
||||||
|
#
|
||||||
|
attr_reader :captures
|
||||||
|
|
||||||
|
# Starts a new scope. Use the #scope method of Parslet::Atoms::DSL
|
||||||
|
# to call this.
|
||||||
|
#
|
||||||
|
def scope
|
||||||
|
captures.push
|
||||||
|
yield
|
||||||
|
ensure
|
||||||
|
captures.pop
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
def lookup(obj, pos)
|
||||||
|
@cache[pos][obj]
|
||||||
|
end
|
||||||
|
def set(obj, pos, val)
|
||||||
|
@cache[pos][obj] = val
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
109
lib/parslet/atoms/dsl.rb
Normal file
109
lib/parslet/atoms/dsl.rb
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
|
||||||
|
# A mixin module that defines operations that can be called on any subclass
|
||||||
|
# of Parslet::Atoms::Base. These operations make parslets atoms chainable and
|
||||||
|
# allow combination of parslet atoms to form bigger parsers.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
#
|
||||||
|
# str('foo') >> str('bar')
|
||||||
|
# str('f').repeat
|
||||||
|
# any.absent? # also called The Epsilon
|
||||||
|
#
|
||||||
|
module Parslet::Atoms::DSL
|
||||||
|
# Construct a new atom that repeats the current atom min times at least and
|
||||||
|
# at most max times. max can be nil to indicate that no maximum is present.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# # match any number of 'a's
|
||||||
|
# str('a').repeat
|
||||||
|
#
|
||||||
|
# # match between 1 and 3 'a's
|
||||||
|
# str('a').repeat(1,3)
|
||||||
|
#
|
||||||
|
def repeat(min=0, max=nil)
|
||||||
|
Parslet::Atoms::Repetition.new(self, min, max)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Returns a new parslet atom that is only maybe present in the input. This
|
||||||
|
# is synonymous to calling #repeat(0,1). Generated tree value will be
|
||||||
|
# either nil (if atom is not present in the input) or the matched subtree.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# str('foo').maybe
|
||||||
|
#
|
||||||
|
def maybe
|
||||||
|
Parslet::Atoms::Repetition.new(self, 0, 1, :maybe)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Chains two parslet atoms together as a sequence.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# str('a') >> str('b')
|
||||||
|
#
|
||||||
|
def >>(parslet)
|
||||||
|
Parslet::Atoms::Sequence.new(self, parslet)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Chains two parslet atoms together to express alternation. A match will
|
||||||
|
# always be attempted with the parslet on the left side first. If it doesn't
|
||||||
|
# match, the right side will be tried.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# # matches either 'a' OR 'b'
|
||||||
|
# str('a') | str('b')
|
||||||
|
#
|
||||||
|
def |(parslet)
|
||||||
|
Parslet::Atoms::Alternative.new(self, parslet)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Tests for absence of a parslet atom in the input stream without consuming
|
||||||
|
# it.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# # Only proceed the parse if 'a' is absent.
|
||||||
|
# str('a').absent?
|
||||||
|
#
|
||||||
|
def absent?
|
||||||
|
Parslet::Atoms::Lookahead.new(self, false)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Tests for presence of a parslet atom in the input stream without consuming
|
||||||
|
# it.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# # Only proceed the parse if 'a' is present.
|
||||||
|
# str('a').present?
|
||||||
|
#
|
||||||
|
def present?
|
||||||
|
Parslet::Atoms::Lookahead.new(self, true)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Alias for present? that will disappear in 2.0 (deprecated)
|
||||||
|
#
|
||||||
|
alias prsnt? present?
|
||||||
|
|
||||||
|
# Alias for absent? that will disappear in 2.0 (deprecated)
|
||||||
|
#
|
||||||
|
alias absnt? absent?
|
||||||
|
|
||||||
|
# Marks a parslet atom as important for the tree output. This must be used
|
||||||
|
# to achieve meaningful output from the #parse method.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# str('a').as(:b) # will produce {:b => 'a'}
|
||||||
|
#
|
||||||
|
def as(name)
|
||||||
|
Parslet::Atoms::Named.new(self, name)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Captures a part of the input and stores it under the name given. This
|
||||||
|
# is very useful to create self-referential parses. A capture stores
|
||||||
|
# the result of its parse (may be complex) on a successful parse action.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# str('a').capture(:b) # will store captures[:b] == 'a'
|
||||||
|
#
|
||||||
|
def capture(name)
|
||||||
|
Parslet::Atoms::Capture.new(self, name)
|
||||||
|
end
|
||||||
|
end
|
32
lib/parslet/atoms/dynamic.rb
Normal file
32
lib/parslet/atoms/dynamic.rb
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
# Evaluates a block at parse time. The result from the block must be a parser
|
||||||
|
# (something which implements #apply). In the first case, the parser will then
|
||||||
|
# be applied to the input, creating the result.
|
||||||
|
#
|
||||||
|
# Dynamic parses are never cached.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# dynamic { rand < 0.5 ? str('a') : str('b') }
|
||||||
|
#
|
||||||
|
class Parslet::Atoms::Dynamic < Parslet::Atoms::Base
|
||||||
|
attr_reader :block
|
||||||
|
|
||||||
|
def initialize(block)
|
||||||
|
@block = block
|
||||||
|
end
|
||||||
|
|
||||||
|
def cached?
|
||||||
|
false
|
||||||
|
end
|
||||||
|
|
||||||
|
def try(source, context, consume_all)
|
||||||
|
result = block.call(source, context)
|
||||||
|
|
||||||
|
# Result is a parslet atom.
|
||||||
|
return result.apply(source, context, consume_all)
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_s_inner(prec)
|
||||||
|
"dynamic { ... }"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
41
lib/parslet/atoms/entity.rb
Normal file
41
lib/parslet/atoms/entity.rb
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
# This wraps pieces of parslet definition and gives them a name. The wrapped
|
||||||
|
# piece is lazily evaluated and cached. This has two purposes:
|
||||||
|
#
|
||||||
|
# * Avoid infinite recursion during evaluation of the definition
|
||||||
|
# * Be able to print things by their name, not by their sometimes
|
||||||
|
# complicated content.
|
||||||
|
#
|
||||||
|
# You don't normally use this directly, instead you should generated it by
|
||||||
|
# using the structuring method Parslet.rule.
|
||||||
|
#
|
||||||
|
class Parslet::Atoms::Entity < Parslet::Atoms::Base
|
||||||
|
attr_reader :name, :block
|
||||||
|
def initialize(name, &block)
|
||||||
|
super()
|
||||||
|
|
||||||
|
@name = name
|
||||||
|
@block = block
|
||||||
|
end
|
||||||
|
|
||||||
|
def try(source, context, consume_all)
|
||||||
|
parslet.apply(source, context, consume_all)
|
||||||
|
end
|
||||||
|
|
||||||
|
def parslet
|
||||||
|
@parslet ||= @block.call.tap { |p|
|
||||||
|
raise_not_implemented unless p
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_s_inner(prec)
|
||||||
|
name.to_s.upcase
|
||||||
|
end
|
||||||
|
private
|
||||||
|
def raise_not_implemented
|
||||||
|
trace = caller.reject {|l| l =~ %r{#{Regexp.escape(__FILE__)}}} # blatantly stolen from dependencies.rb in activesupport
|
||||||
|
exception = NotImplementedError.new("rule(#{name.inspect}) { ... } returns nil. Still not implemented, but already used?")
|
||||||
|
exception.set_backtrace(trace)
|
||||||
|
|
||||||
|
raise exception
|
||||||
|
end
|
||||||
|
end
|
121
lib/parslet/atoms/infix.rb
Normal file
121
lib/parslet/atoms/infix.rb
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
class Parslet::Atoms::Infix < Parslet::Atoms::Base
|
||||||
|
attr_reader :element, :operations
|
||||||
|
|
||||||
|
def initialize(element, operations)
|
||||||
|
super()
|
||||||
|
|
||||||
|
@element = element
|
||||||
|
@operations = operations
|
||||||
|
end
|
||||||
|
|
||||||
|
def try(source, context, consume_all)
|
||||||
|
return catch_error {
|
||||||
|
return succ(
|
||||||
|
produce_tree(
|
||||||
|
precedence_climb(source, context, consume_all)))
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
# Turns an array of the form ['1', '+', ['2', '*', '3']] into a hash that
|
||||||
|
# reflects the same structure.
|
||||||
|
#
|
||||||
|
def produce_tree(ary)
|
||||||
|
return ary unless ary.kind_of? Array
|
||||||
|
|
||||||
|
left = ary.shift
|
||||||
|
|
||||||
|
until ary.empty?
|
||||||
|
op, right = ary.shift(2)
|
||||||
|
|
||||||
|
# p [left, op, right]
|
||||||
|
|
||||||
|
if right.kind_of? Array
|
||||||
|
# Subexpression -> Subhash
|
||||||
|
left = {l: left, o: op, r: produce_tree(right)}
|
||||||
|
else
|
||||||
|
left = {l: left, o: op, r: right}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
left
|
||||||
|
end
|
||||||
|
|
||||||
|
# A precedence climbing algorithm married to parslet, as described here
|
||||||
|
# http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing/
|
||||||
|
#
|
||||||
|
# @note Error handling in this routine is done by throwing :error and
|
||||||
|
# as a value the error to return to parslet. This avoids cluttering
|
||||||
|
# the recursion logic here with parslet error handling.
|
||||||
|
#
|
||||||
|
def precedence_climb(source, context, consume_all, current_prec=1, needs_element=false)
|
||||||
|
result = []
|
||||||
|
|
||||||
|
# To even begin parsing an arithmetic expression, there needs to be
|
||||||
|
# at least one @element.
|
||||||
|
success, value = @element.apply(source, context, false)
|
||||||
|
|
||||||
|
unless success
|
||||||
|
abort context.err(self, source, "#{@element.inspect} was expected", [value])
|
||||||
|
end
|
||||||
|
|
||||||
|
result << flatten(value, true)
|
||||||
|
|
||||||
|
# Loop until we fail on operator matching or until input runs out.
|
||||||
|
loop do
|
||||||
|
op_pos = source.pos
|
||||||
|
op_match, prec, assoc = match_operation(source, context, false)
|
||||||
|
|
||||||
|
# If no operator could be matched here, one of several cases
|
||||||
|
# applies:
|
||||||
|
#
|
||||||
|
# - end of file
|
||||||
|
# - end of expression
|
||||||
|
# - syntax error
|
||||||
|
#
|
||||||
|
# We abort matching the expression here.
|
||||||
|
break unless op_match
|
||||||
|
|
||||||
|
if prec >= current_prec
|
||||||
|
next_prec = (assoc == :left) ? prec+1 : prec
|
||||||
|
|
||||||
|
result << op_match
|
||||||
|
result << precedence_climb(
|
||||||
|
source, context, consume_all, next_prec, true)
|
||||||
|
else
|
||||||
|
source.pos = op_pos
|
||||||
|
return unwrap(result)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return unwrap(result)
|
||||||
|
end
|
||||||
|
|
||||||
|
def unwrap expr
|
||||||
|
expr.size == 1 ? expr.first : expr
|
||||||
|
end
|
||||||
|
|
||||||
|
def match_operation(source, context, consume_all)
|
||||||
|
errors = []
|
||||||
|
@operations.each do |op_atom, prec, assoc|
|
||||||
|
success, value = op_atom.apply(source, context, consume_all)
|
||||||
|
return flatten(value, true), prec, assoc if success
|
||||||
|
|
||||||
|
# assert: this was in fact an error, accumulate
|
||||||
|
errors << value
|
||||||
|
end
|
||||||
|
|
||||||
|
return nil
|
||||||
|
end
|
||||||
|
|
||||||
|
def abort(error)
|
||||||
|
throw :error, error
|
||||||
|
end
|
||||||
|
def catch_error
|
||||||
|
catch(:error) { yield }
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_s_inner(prec)
|
||||||
|
ops = @operations.map { |o, _, _| o.inspect }.join(', ')
|
||||||
|
"infix_expression(#{@element.inspect}, [#{ops}])"
|
||||||
|
end
|
||||||
|
end
|
49
lib/parslet/atoms/lookahead.rb
Normal file
49
lib/parslet/atoms/lookahead.rb
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
# Either positive or negative lookahead, doesn't consume its input.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
#
|
||||||
|
# str('foo').present? # matches when the input contains 'foo', but leaves it
|
||||||
|
#
|
||||||
|
class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
|
||||||
|
attr_reader :positive
|
||||||
|
attr_reader :bound_parslet
|
||||||
|
|
||||||
|
def initialize(bound_parslet, positive=true)
|
||||||
|
super()
|
||||||
|
|
||||||
|
# Model positive and negative lookahead by testing this flag.
|
||||||
|
@positive = positive
|
||||||
|
@bound_parslet = bound_parslet
|
||||||
|
|
||||||
|
@error_msgs = {
|
||||||
|
:positive => ["Input should start with ", bound_parslet],
|
||||||
|
:negative => ["Input should not start with ", bound_parslet]
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
def try(source, context, consume_all)
|
||||||
|
pos = source.pos
|
||||||
|
|
||||||
|
success, value = bound_parslet.apply(source, context, consume_all)
|
||||||
|
|
||||||
|
if positive
|
||||||
|
return succ(nil) if success
|
||||||
|
return context.err_at(self, source, @error_msgs[:positive], pos)
|
||||||
|
else
|
||||||
|
return succ(nil) unless success
|
||||||
|
return context.err_at(self, source, @error_msgs[:negative], pos)
|
||||||
|
end
|
||||||
|
|
||||||
|
# This is probably the only parslet that rewinds its input in #try.
|
||||||
|
# Lookaheads NEVER consume their input, even on success, that's why.
|
||||||
|
ensure
|
||||||
|
source.pos = pos
|
||||||
|
end
|
||||||
|
|
||||||
|
precedence LOOKAHEAD
|
||||||
|
def to_s_inner(prec)
|
||||||
|
char = positive ? '&' : '!'
|
||||||
|
|
||||||
|
"#{char}#{bound_parslet.to_s(prec)}"
|
||||||
|
end
|
||||||
|
end
|
32
lib/parslet/atoms/named.rb
Normal file
32
lib/parslet/atoms/named.rb
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
# Names a match to influence tree construction.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
#
|
||||||
|
# str('foo') # will return 'foo',
|
||||||
|
# str('foo').as(:foo) # will return :foo => 'foo'
|
||||||
|
#
|
||||||
|
class Parslet::Atoms::Named < Parslet::Atoms::Base
|
||||||
|
attr_reader :parslet, :name
|
||||||
|
def initialize(parslet, name)
|
||||||
|
super()
|
||||||
|
|
||||||
|
@parslet, @name = parslet, name
|
||||||
|
end
|
||||||
|
|
||||||
|
def apply(source, context, consume_all)
|
||||||
|
success, value = result = parslet.apply(source, context, consume_all)
|
||||||
|
|
||||||
|
return result unless success
|
||||||
|
succ(
|
||||||
|
produce_return_value(
|
||||||
|
value))
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_s_inner(prec)
|
||||||
|
"#{name}:#{parslet.to_s(prec)}"
|
||||||
|
end
|
||||||
|
private
|
||||||
|
def produce_return_value(val)
|
||||||
|
{ name => flatten(val, true) }
|
||||||
|
end
|
||||||
|
end
|
38
lib/parslet/atoms/re.rb
Normal file
38
lib/parslet/atoms/re.rb
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
# Matches a special kind of regular expression that only ever matches one
|
||||||
|
# character at a time. Useful members of this family are: <code>character
|
||||||
|
# ranges, \\w, \\d, \\r, \\n, ...</code>
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
#
|
||||||
|
# match('[a-z]') # matches a-z
|
||||||
|
# match('\s') # like regexps: matches space characters
|
||||||
|
#
|
||||||
|
class Parslet::Atoms::Re < Parslet::Atoms::Base
|
||||||
|
attr_reader :match, :re
|
||||||
|
def initialize(match)
|
||||||
|
super()
|
||||||
|
|
||||||
|
@match = match.to_s
|
||||||
|
@re = Regexp.new(self.match, Regexp::MULTILINE)
|
||||||
|
@error_msgs = {
|
||||||
|
:premature => "Premature end of input",
|
||||||
|
:failed => "Failed to match #{match.inspect[1..-2]}"
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
def try(source, context, consume_all)
|
||||||
|
return succ(source.consume(1)) if source.matches?(@re)
|
||||||
|
|
||||||
|
# No string could be read
|
||||||
|
return context.err(self, source, @error_msgs[:premature]) \
|
||||||
|
if source.chars_left < 1
|
||||||
|
|
||||||
|
# No match
|
||||||
|
return context.err(self, source, @error_msgs[:failed])
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_s_inner(prec)
|
||||||
|
match.inspect[1..-2]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
83
lib/parslet/atoms/repetition.rb
Normal file
83
lib/parslet/atoms/repetition.rb
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
|
||||||
|
# Matches a parslet repeatedly.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
#
|
||||||
|
# str('a').repeat(1,3) # matches 'a' at least once, but at most three times
|
||||||
|
# str('a').maybe # matches 'a' if it is present in the input (repeat(0,1))
|
||||||
|
#
|
||||||
|
class Parslet::Atoms::Repetition < Parslet::Atoms::Base
|
||||||
|
attr_reader :min, :max, :parslet
|
||||||
|
def initialize(parslet, min, max, tag=:repetition)
|
||||||
|
super()
|
||||||
|
|
||||||
|
raise ArgumentError,
|
||||||
|
"Asking for zero repetitions of a parslet. (#{parslet.inspect} repeating #{min},#{max})" \
|
||||||
|
if max == 0
|
||||||
|
|
||||||
|
|
||||||
|
@parslet = parslet
|
||||||
|
@min, @max = min, max
|
||||||
|
@tag = tag
|
||||||
|
@error_msgs = {
|
||||||
|
:minrep => "Expected at least #{min} of #{parslet.inspect}",
|
||||||
|
:unconsumed => "Extra input after last repetition"
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
def try(source, context, consume_all)
|
||||||
|
occ = 0
|
||||||
|
accum = [@tag] # initialize the result array with the tag (for flattening)
|
||||||
|
start_pos = source.pos
|
||||||
|
|
||||||
|
break_on = nil
|
||||||
|
loop do
|
||||||
|
success, value = parslet.apply(source, context, false)
|
||||||
|
|
||||||
|
break_on = value
|
||||||
|
break unless success
|
||||||
|
|
||||||
|
occ += 1
|
||||||
|
accum << value
|
||||||
|
|
||||||
|
# If we're not greedy (max is defined), check if that has been reached.
|
||||||
|
return succ(accum) if max && occ>=max
|
||||||
|
end
|
||||||
|
|
||||||
|
# Last attempt to match parslet was a failure, failure reason in break_on.
|
||||||
|
|
||||||
|
# Greedy matcher has produced a failure. Check if occ (which will
|
||||||
|
# contain the number of successes) is >= min.
|
||||||
|
return context.err_at(
|
||||||
|
self,
|
||||||
|
source,
|
||||||
|
@error_msgs[:minrep],
|
||||||
|
start_pos,
|
||||||
|
[break_on]) if occ < min
|
||||||
|
|
||||||
|
# consume_all is true, that means that we're inside the part of the parser
|
||||||
|
# that should consume the input completely. Repetition failing here means
|
||||||
|
# probably that we didn't.
|
||||||
|
#
|
||||||
|
# We have a special clause to create an error here because otherwise
|
||||||
|
# break_on would get thrown away. It turns out, that contains very
|
||||||
|
# interesting information in a lot of cases.
|
||||||
|
#
|
||||||
|
return context.err(
|
||||||
|
self,
|
||||||
|
source,
|
||||||
|
@error_msgs[:unconsumed],
|
||||||
|
[break_on]) if consume_all && source.chars_left>0
|
||||||
|
|
||||||
|
return succ(accum)
|
||||||
|
end
|
||||||
|
|
||||||
|
precedence REPETITION
|
||||||
|
def to_s_inner(prec)
|
||||||
|
minmax = "{#{min}, #{max}}"
|
||||||
|
minmax = '?' if min == 0 && max == 1
|
||||||
|
|
||||||
|
parslet.to_s(prec) + minmax
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
26
lib/parslet/atoms/scope.rb
Normal file
26
lib/parslet/atoms/scope.rb
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
# Starts a new scope in the parsing process. Please also see the #captures
|
||||||
|
# method.
|
||||||
|
#
|
||||||
|
class Parslet::Atoms::Scope < Parslet::Atoms::Base
|
||||||
|
attr_reader :block
|
||||||
|
def initialize(block)
|
||||||
|
super()
|
||||||
|
|
||||||
|
@block = block
|
||||||
|
end
|
||||||
|
|
||||||
|
def cached?
|
||||||
|
false
|
||||||
|
end
|
||||||
|
|
||||||
|
def apply(source, context, consume_all)
|
||||||
|
context.scope do
|
||||||
|
parslet = block.call
|
||||||
|
return parslet.apply(source, context, consume_all)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_s_inner(prec)
|
||||||
|
"scope { #{block.call.to_s(prec)} }"
|
||||||
|
end
|
||||||
|
end
|
45
lib/parslet/atoms/sequence.rb
Normal file
45
lib/parslet/atoms/sequence.rb
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
# A sequence of parslets, matched from left to right. Denoted by '>>'
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
#
|
||||||
|
# str('a') >> str('b') # matches 'a', then 'b'
|
||||||
|
#
|
||||||
|
class Parslet::Atoms::Sequence < Parslet::Atoms::Base
|
||||||
|
attr_reader :parslets
|
||||||
|
def initialize(*parslets)
|
||||||
|
super()
|
||||||
|
|
||||||
|
@parslets = parslets
|
||||||
|
@error_msgs = {
|
||||||
|
:failed => "Failed to match sequence (#{self.inspect})"
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
def >>(parslet)
|
||||||
|
self.class.new(* @parslets+[parslet])
|
||||||
|
end
|
||||||
|
|
||||||
|
def try(source, context, consume_all)
|
||||||
|
# Presize an array
|
||||||
|
result = Array.new(parslets.size + 1)
|
||||||
|
result[0] = :sequence
|
||||||
|
|
||||||
|
parslets.each_with_index do |p, idx|
|
||||||
|
child_consume_all = consume_all && (idx == parslets.size-1)
|
||||||
|
success, value = p.apply(source, context, child_consume_all)
|
||||||
|
|
||||||
|
unless success
|
||||||
|
return context.err(self, source, @error_msgs[:failed], [value])
|
||||||
|
end
|
||||||
|
|
||||||
|
result[idx+1] = value
|
||||||
|
end
|
||||||
|
|
||||||
|
return succ(result)
|
||||||
|
end
|
||||||
|
|
||||||
|
precedence SEQUENCE
|
||||||
|
def to_s_inner(prec)
|
||||||
|
parslets.map { |p| p.to_s(prec) }.join(' ')
|
||||||
|
end
|
||||||
|
end
|
39
lib/parslet/atoms/str.rb
Normal file
39
lib/parslet/atoms/str.rb
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
# Matches a string of characters.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
#
|
||||||
|
# str('foo') # matches 'foo'
|
||||||
|
#
|
||||||
|
class Parslet::Atoms::Str < Parslet::Atoms::Base
|
||||||
|
attr_reader :str
|
||||||
|
def initialize(str)
|
||||||
|
super()
|
||||||
|
|
||||||
|
@str = str.to_s
|
||||||
|
@pat = Regexp.new(Regexp.escape(str))
|
||||||
|
@len = str.size
|
||||||
|
@error_msgs = {
|
||||||
|
:premature => "Premature end of input",
|
||||||
|
:failed => "Expected #{str.inspect}, but got "
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
def try(source, context, consume_all)
|
||||||
|
return succ(source.consume(@len)) if source.matches?(@pat)
|
||||||
|
|
||||||
|
# Input ending early:
|
||||||
|
return context.err(self, source, @error_msgs[:premature]) \
|
||||||
|
if source.chars_left<@len
|
||||||
|
|
||||||
|
# Expected something, but got something else instead:
|
||||||
|
error_pos = source.pos
|
||||||
|
return context.err_at(
|
||||||
|
self, source,
|
||||||
|
[@error_msgs[:failed], source.consume(@len)], error_pos)
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_s_inner(prec)
|
||||||
|
"'#{str}'"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
89
lib/parslet/atoms/visitor.rb
Normal file
89
lib/parslet/atoms/visitor.rb
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
# Augments all parslet atoms with an accept method that will call back
|
||||||
|
# to the visitor given.
|
||||||
|
|
||||||
|
#
|
||||||
|
module Parslet::Atoms
|
||||||
|
class Base
|
||||||
|
def accept(visitor)
|
||||||
|
raise NotImplementedError, "No #accept method on #{self.class.name}."
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class Str
|
||||||
|
# Call back visitors #visit_str method. See parslet/export for an example.
|
||||||
|
#
|
||||||
|
def accept(visitor)
|
||||||
|
visitor.visit_str(str)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class Entity
|
||||||
|
# Call back visitors #visit_entity method. See parslet/export for an
|
||||||
|
# example.
|
||||||
|
#
|
||||||
|
def accept(visitor)
|
||||||
|
visitor.visit_entity(name, block)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class Named
|
||||||
|
# Call back visitors #visit_named method. See parslet/export for an
|
||||||
|
# example.
|
||||||
|
#
|
||||||
|
def accept(visitor)
|
||||||
|
visitor.visit_named(name, parslet)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class Sequence
|
||||||
|
# Call back visitors #visit_sequence method. See parslet/export for an
|
||||||
|
# example.
|
||||||
|
#
|
||||||
|
def accept(visitor)
|
||||||
|
visitor.visit_sequence(parslets)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class Repetition
|
||||||
|
# Call back visitors #visit_repetition method. See parslet/export for an
|
||||||
|
# example.
|
||||||
|
#
|
||||||
|
def accept(visitor)
|
||||||
|
visitor.visit_repetition(@tag, min, max, parslet)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class Alternative
|
||||||
|
# Call back visitors #visit_alternative method. See parslet/export for an
|
||||||
|
# example.
|
||||||
|
#
|
||||||
|
def accept(visitor)
|
||||||
|
visitor.visit_alternative(alternatives)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class Lookahead
|
||||||
|
# Call back visitors #visit_lookahead method. See parslet/export for an
|
||||||
|
# example.
|
||||||
|
#
|
||||||
|
def accept(visitor)
|
||||||
|
visitor.visit_lookahead(positive, bound_parslet)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class Re
|
||||||
|
# Call back visitors #visit_re method. See parslet/export for an example.
|
||||||
|
#
|
||||||
|
def accept(visitor)
|
||||||
|
visitor.visit_re(match)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class Parslet::Parser
|
||||||
|
# Call back visitors #visit_parser method.
|
||||||
|
#
|
||||||
|
def accept(visitor)
|
||||||
|
visitor.visit_parser(root)
|
||||||
|
end
|
||||||
|
end
|
94
lib/parslet/cause.rb
Normal file
94
lib/parslet/cause.rb
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
module Parslet
|
||||||
|
# Represents a cause why a parse did fail. A lot of these objects are
|
||||||
|
# constructed - not all of the causes turn out to be failures for the whole
|
||||||
|
# parse.
|
||||||
|
#
|
||||||
|
class Cause
|
||||||
|
def initialize(message, source, pos, children)
|
||||||
|
@message, @source, @pos, @children =
|
||||||
|
message, source, pos, children
|
||||||
|
end
|
||||||
|
|
||||||
|
# @return [String, Array] A string or an array of message pieces that
|
||||||
|
# provide failure information. Use #to_s to get a formatted string.
|
||||||
|
attr_reader :message
|
||||||
|
|
||||||
|
# @return [Parslet::Source] Source that was parsed when this error
|
||||||
|
# happend. Mainly used for line number information.
|
||||||
|
attr_reader :source
|
||||||
|
|
||||||
|
# Location of the error.
|
||||||
|
#
|
||||||
|
# @return [Fixnum] Position where the error happened. (character offset)
|
||||||
|
attr_reader :pos
|
||||||
|
|
||||||
|
# When this cause is part of a tree of error causes: child nodes for this
|
||||||
|
# node. Very often carries the reasons for this cause.
|
||||||
|
#
|
||||||
|
# @return [Array<Parslet::Cause>] A list of reasons for this cause.
|
||||||
|
def children
|
||||||
|
@children ||= []
|
||||||
|
end
|
||||||
|
|
||||||
|
# Appends 'at line LINE char CHAR' to the string given. Use +pos+ to
|
||||||
|
# override the position of the +source+. This method returns an object
|
||||||
|
# that can be turned into a string using #to_s.
|
||||||
|
#
|
||||||
|
# @param source [Parslet::Source] source that was parsed when this error
|
||||||
|
# happened
|
||||||
|
# @param pos [Fixnum] position of error
|
||||||
|
# @param str [String, Array<String>] message parts
|
||||||
|
# @param children [Array<Parslet::Cause>] child nodes for this error tree
|
||||||
|
# @return [Parslet::Cause] a new instance of {Parslet::Cause}
|
||||||
|
#
|
||||||
|
def self.format(source, pos, str, children=[])
|
||||||
|
self.new(str, source, pos, children)
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_s
|
||||||
|
line, column = source.line_and_column(pos)
|
||||||
|
# Allow message to be a list of objects. Join them here, since we now
|
||||||
|
# really need it.
|
||||||
|
Array(message).map { |o|
|
||||||
|
o.respond_to?(:to_slice) ?
|
||||||
|
o.str.inspect :
|
||||||
|
o.to_s }.join + " at line #{line} char #{column}."
|
||||||
|
end
|
||||||
|
|
||||||
|
# Signals to the outside that the parse has failed. Use this in
|
||||||
|
# conjunction with .format for nice error messages.
|
||||||
|
#
|
||||||
|
def raise(exception_klass=Parslet::ParseFailed)
|
||||||
|
exception = exception_klass.new(self.to_s, self)
|
||||||
|
Kernel.raise exception
|
||||||
|
end
|
||||||
|
|
||||||
|
# Returns an ascii tree representation of the causes of this node and its
|
||||||
|
# children.
|
||||||
|
#
|
||||||
|
def ascii_tree
|
||||||
|
StringIO.new.tap { |io|
|
||||||
|
recursive_ascii_tree(self, io, [true]) }.
|
||||||
|
string
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
def recursive_ascii_tree(node, stream, curved)
|
||||||
|
append_prefix(stream, curved)
|
||||||
|
stream.puts node.to_s
|
||||||
|
|
||||||
|
node.children.each do |child|
|
||||||
|
last_child = (node.children.last == child)
|
||||||
|
|
||||||
|
recursive_ascii_tree(child, stream, curved + [last_child])
|
||||||
|
end
|
||||||
|
end
|
||||||
|
def append_prefix(stream, curved)
|
||||||
|
return if curved.size < 2
|
||||||
|
curved[1..-2].each do |c|
|
||||||
|
stream.print c ? " " : "| "
|
||||||
|
end
|
||||||
|
stream.print curved.last ? "`- " : "|- "
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
33
lib/parslet/context.rb
Normal file
33
lib/parslet/context.rb
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
require 'blankslate'
|
||||||
|
|
||||||
|
# Provides a context for tree transformations to run in. The context allows
|
||||||
|
# accessing each of the bindings in the bindings hash as local method.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
#
|
||||||
|
# ctx = Context.new(:a => :b)
|
||||||
|
# ctx.instance_eval do
|
||||||
|
# a # => :b
|
||||||
|
# end
|
||||||
|
#
|
||||||
|
# @api private
|
||||||
|
class Parslet::Context < BlankSlate
|
||||||
|
reveal :methods
|
||||||
|
reveal :respond_to?
|
||||||
|
reveal :inspect
|
||||||
|
reveal :to_s
|
||||||
|
reveal :instance_variable_set
|
||||||
|
|
||||||
|
def meta_def(name, &body)
|
||||||
|
metaclass = class <<self; self; end
|
||||||
|
|
||||||
|
metaclass.send(:define_method, name, &body)
|
||||||
|
end
|
||||||
|
|
||||||
|
def initialize(bindings)
|
||||||
|
bindings.each do |key, value|
|
||||||
|
meta_def(key.to_sym) { value }
|
||||||
|
instance_variable_set("@#{key}", value)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
33
lib/parslet/convenience.rb
Normal file
33
lib/parslet/convenience.rb
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
class Parslet::Atoms::Base
|
||||||
|
|
||||||
|
# Packages the common idiom
|
||||||
|
#
|
||||||
|
# begin
|
||||||
|
# tree = parser.parse('something')
|
||||||
|
# rescue Parslet::ParseFailed => error
|
||||||
|
# puts parser.cause.ascii_tree
|
||||||
|
# end
|
||||||
|
#
|
||||||
|
# into a convenient method.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
#
|
||||||
|
# require 'parslet'
|
||||||
|
# require 'parslet/convenience'
|
||||||
|
#
|
||||||
|
# class FooParser < Parslet::Parser
|
||||||
|
# rule(:foo) { str('foo') }
|
||||||
|
# root(:foo)
|
||||||
|
# end
|
||||||
|
#
|
||||||
|
# FooParser.new.parse_with_debug('bar')
|
||||||
|
#
|
||||||
|
# @see Parslet::Atoms::Base#parse
|
||||||
|
#
|
||||||
|
def parse_with_debug str, opts={}
|
||||||
|
parse str, opts
|
||||||
|
rescue Parslet::ParseFailed => error
|
||||||
|
puts error.cause.ascii_tree
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
7
lib/parslet/error_reporter.rb
Normal file
7
lib/parslet/error_reporter.rb
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
# A namespace for all error reporters.
|
||||||
|
#
|
||||||
|
module Parslet::ErrorReporter
|
||||||
|
end
|
||||||
|
|
||||||
|
require 'parslet/error_reporter/tree'
|
||||||
|
require 'parslet/error_reporter/deepest'
|
95
lib/parslet/error_reporter/deepest.rb
Normal file
95
lib/parslet/error_reporter/deepest.rb
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
module Parslet
|
||||||
|
module ErrorReporter
|
||||||
|
# Instead of reporting the latest error that happens like {Tree} does,
|
||||||
|
# this class reports the deepest error. Depth is defined here as how
|
||||||
|
# advanced into the input an error happens. The errors close to the
|
||||||
|
# greatest depth tend to be more relevant to the end user, since they
|
||||||
|
# specify what could be done to make them go away.
|
||||||
|
#
|
||||||
|
# More specifically, errors produced by this reporter won't be related to
|
||||||
|
# the structure of the grammar at all. The positions of the errors will
|
||||||
|
# be advanced and convey at every grammar level what the deepest rule
|
||||||
|
# was to fail.
|
||||||
|
#
|
||||||
|
class Deepest
|
||||||
|
def initialize
|
||||||
|
@deepest_cause = nil
|
||||||
|
end
|
||||||
|
|
||||||
|
# Produces an error cause that combines the message at the current level
|
||||||
|
# with the errors that happened at a level below (children).
|
||||||
|
#
|
||||||
|
# @param atom [Parslet::Atoms::Base] parslet that failed
|
||||||
|
# @param source [Source] Source that we're using for this parse. (line
|
||||||
|
# number information...)
|
||||||
|
# @param message [String, Array] Error message at this level.
|
||||||
|
# @param children [Array] A list of errors from a deeper level (or nil).
|
||||||
|
# @return [Cause] An error tree combining children with message.
|
||||||
|
#
|
||||||
|
def err(atom, source, message, children=nil)
|
||||||
|
position = source.pos
|
||||||
|
cause = Cause.format(source, position, message, children)
|
||||||
|
return deepest(cause)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Produces an error cause that combines the message at the current level
|
||||||
|
# with the errors that happened at a level below (children).
|
||||||
|
#
|
||||||
|
# @param atom [Parslet::Atoms::Base] parslet that failed
|
||||||
|
# @param source [Source] Source that we're using for this parse. (line
|
||||||
|
# number information...)
|
||||||
|
# @param message [String, Array] Error message at this level.
|
||||||
|
# @param pos [Fixnum] The real position of the error.
|
||||||
|
# @param children [Array] A list of errors from a deeper level (or nil).
|
||||||
|
# @return [Cause] An error tree combining children with message.
|
||||||
|
#
|
||||||
|
def err_at(atom, source, message, pos, children=nil)
|
||||||
|
position = pos
|
||||||
|
cause = Cause.format(source, position, message, children)
|
||||||
|
return deepest(cause)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Returns the cause that is currently deepest. Mainly for specs.
|
||||||
|
#
|
||||||
|
attr_reader :deepest_cause
|
||||||
|
|
||||||
|
# Checks to see if the lineage of the cause given includes a cause with
|
||||||
|
# an error position deeper than the current deepest cause stored. If
|
||||||
|
# yes, it passes the cause through to the caller. If no, it returns the
|
||||||
|
# current deepest error that was saved as a reference.
|
||||||
|
#
|
||||||
|
def deepest(cause)
|
||||||
|
rank, leaf = deepest_child(cause)
|
||||||
|
|
||||||
|
if !deepest_cause || leaf.pos >= deepest_cause.pos
|
||||||
|
# This error reaches deeper into the input, save it as reference.
|
||||||
|
@deepest_cause = leaf
|
||||||
|
return cause
|
||||||
|
end
|
||||||
|
|
||||||
|
return deepest_cause
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
# Returns the leaf from a given error tree with the biggest rank.
|
||||||
|
#
|
||||||
|
def deepest_child(cause, rank=0)
|
||||||
|
max_child = cause
|
||||||
|
max_rank = rank
|
||||||
|
|
||||||
|
if cause.children && !cause.children.empty?
|
||||||
|
cause.children.each do |child|
|
||||||
|
c_rank, c_cause = deepest_child(child, rank+1)
|
||||||
|
|
||||||
|
if c_rank > max_rank
|
||||||
|
max_rank = c_rank
|
||||||
|
max_child = c_cause
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return max_rank, max_child
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
57
lib/parslet/error_reporter/tree.rb
Normal file
57
lib/parslet/error_reporter/tree.rb
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
module Parslet
|
||||||
|
module ErrorReporter
|
||||||
|
# An error reporter has two central methods, one for reporting errors at
|
||||||
|
# the current parse position (#err) and one for reporting errors at a
|
||||||
|
# given parse position (#err_at). The reporter can return an object (a
|
||||||
|
# 'cause') that will be returned to the caller along with the information
|
||||||
|
# that the parse failed.
|
||||||
|
#
|
||||||
|
# When reporting errors on the outer levels of your parser, these methods
|
||||||
|
# get passed a list of error objects ('causes') from the inner levels. In
|
||||||
|
# this default implementation, the inner levels are considered error
|
||||||
|
# subtrees and are appended to the generated tree node at each level,
|
||||||
|
# thereby constructing an error tree.
|
||||||
|
#
|
||||||
|
# This error tree will report in parallel with the grammar structure that
|
||||||
|
# failed. A one-to-one correspondence exists between each error in the
|
||||||
|
# tree and the parslet atom that produced that error.
|
||||||
|
#
|
||||||
|
# The implementor is really free to use these return values as he sees
|
||||||
|
# fit. One example would be to return an error state object from these
|
||||||
|
# methods that is then updated as errors cascade up the parse derivation
|
||||||
|
# tree.
|
||||||
|
#
|
||||||
|
class Tree
|
||||||
|
# Produces an error cause that combines the message at the current level
|
||||||
|
# with the errors that happened at a level below (children).
|
||||||
|
#
|
||||||
|
# @param atom [Parslet::Atoms::Base] parslet that failed
|
||||||
|
# @param source [Source] Source that we're using for this parse. (line
|
||||||
|
# number information...)
|
||||||
|
# @param message [String, Array] Error message at this level.
|
||||||
|
# @param children [Array] A list of errors from a deeper level (or nil).
|
||||||
|
# @return [Cause] An error tree combining children with message.
|
||||||
|
#
|
||||||
|
def err(atom, source, message, children=nil)
|
||||||
|
position = source.pos
|
||||||
|
Cause.format(source, position, message, children)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Produces an error cause that combines the message at the current level
|
||||||
|
# with the errors that happened at a level below (children).
|
||||||
|
#
|
||||||
|
# @param atom [Parslet::Atoms::Base] parslet that failed
|
||||||
|
# @param source [Source] Source that we're using for this parse. (line
|
||||||
|
# number information...)
|
||||||
|
# @param message [String, Array] Error message at this level.
|
||||||
|
# @param pos [Fixnum] The real position of the error.
|
||||||
|
# @param children [Array] A list of errors from a deeper level (or nil).
|
||||||
|
# @return [Cause] An error tree combining children with message.
|
||||||
|
#
|
||||||
|
def err_at(atom, source, message, pos, children=nil)
|
||||||
|
position = pos
|
||||||
|
Cause.format(source, position, message, children)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
162
lib/parslet/export.rb
Normal file
162
lib/parslet/export.rb
Normal file
@ -0,0 +1,162 @@
|
|||||||
|
# Allows exporting parslet grammars to other lingos.
|
||||||
|
|
||||||
|
require 'set'
|
||||||
|
require 'parslet/atoms/visitor'
|
||||||
|
|
||||||
|
class Parslet::Parser
|
||||||
|
module Visitors
|
||||||
|
class Citrus
|
||||||
|
attr_reader :context, :output
|
||||||
|
def initialize(context)
|
||||||
|
@context = context
|
||||||
|
end
|
||||||
|
|
||||||
|
def visit_str(str)
|
||||||
|
"\"#{str.inspect[1..-2]}\""
|
||||||
|
end
|
||||||
|
def visit_re(match)
|
||||||
|
match.to_s
|
||||||
|
end
|
||||||
|
|
||||||
|
def visit_entity(name, block)
|
||||||
|
context.deferred(name, block)
|
||||||
|
|
||||||
|
"(#{context.mangle_name(name)})"
|
||||||
|
end
|
||||||
|
def visit_named(name, parslet)
|
||||||
|
parslet.accept(self)
|
||||||
|
end
|
||||||
|
|
||||||
|
def visit_sequence(parslets)
|
||||||
|
'(' <<
|
||||||
|
parslets.
|
||||||
|
map { |el| el.accept(self) }.
|
||||||
|
join(' ') <<
|
||||||
|
')'
|
||||||
|
end
|
||||||
|
def visit_repetition(tag, min, max, parslet)
|
||||||
|
parslet.accept(self) << "#{min}*#{max}"
|
||||||
|
end
|
||||||
|
def visit_alternative(alternatives)
|
||||||
|
'(' <<
|
||||||
|
alternatives.
|
||||||
|
map { |el| el.accept(self) }.
|
||||||
|
join(' | ') <<
|
||||||
|
')'
|
||||||
|
end
|
||||||
|
|
||||||
|
def visit_lookahead(positive, bound_parslet)
|
||||||
|
(positive ? '&' : '!') <<
|
||||||
|
bound_parslet.accept(self)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class Treetop < Citrus
|
||||||
|
def visit_repetition(tag, min, max, parslet)
|
||||||
|
parslet.accept(self) << "#{min}..#{max}"
|
||||||
|
end
|
||||||
|
|
||||||
|
def visit_alternative(alternatives)
|
||||||
|
'(' <<
|
||||||
|
alternatives.
|
||||||
|
map { |el| el.accept(self) }.
|
||||||
|
join(' / ') <<
|
||||||
|
')'
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# A helper class that formats Citrus and Treetop grammars as a string.
|
||||||
|
#
|
||||||
|
class PrettyPrinter
|
||||||
|
attr_reader :visitor
|
||||||
|
def initialize(visitor_klass)
|
||||||
|
@visitor = visitor_klass.new(self)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Pretty prints the given parslet using the visitor that has been
|
||||||
|
# configured in initialize. Returns the string representation of the
|
||||||
|
# Citrus or Treetop grammar.
|
||||||
|
#
|
||||||
|
def pretty_print(name, parslet)
|
||||||
|
output = "grammar #{name}\n"
|
||||||
|
|
||||||
|
output << rule('root', parslet)
|
||||||
|
|
||||||
|
seen = Set.new
|
||||||
|
loop do
|
||||||
|
# @todo is constantly filled by the visitor (see #deferred). We
|
||||||
|
# keep going until it is empty.
|
||||||
|
break if @todo.empty?
|
||||||
|
name, block = @todo.shift
|
||||||
|
|
||||||
|
# Track what rules we've already seen. This breaks loops.
|
||||||
|
next if seen.include?(name)
|
||||||
|
seen << name
|
||||||
|
|
||||||
|
output << rule(name, block.call)
|
||||||
|
end
|
||||||
|
|
||||||
|
output << "end\n"
|
||||||
|
end
|
||||||
|
|
||||||
|
# Formats a rule in either dialect.
|
||||||
|
#
|
||||||
|
def rule(name, parslet)
|
||||||
|
" rule #{mangle_name name}\n" <<
|
||||||
|
" " << parslet.accept(visitor) << "\n" <<
|
||||||
|
" end\n"
|
||||||
|
end
|
||||||
|
|
||||||
|
# Whenever the visitor encounters an rule in a parslet, it defers the
|
||||||
|
# pretty printing of the rule by calling this method.
|
||||||
|
#
|
||||||
|
def deferred(name, content)
|
||||||
|
@todo ||= []
|
||||||
|
@todo << [name, content]
|
||||||
|
end
|
||||||
|
|
||||||
|
# Mangles names so that Citrus and Treetop can live with it. This mostly
|
||||||
|
# transforms some of the things that Ruby allows into other patterns. If
|
||||||
|
# there is collision, we will not detect it for now.
|
||||||
|
#
|
||||||
|
def mangle_name(str)
|
||||||
|
str.to_s.sub(/\?$/, '_p')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Exports the current parser instance as a string in the Citrus dialect.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
#
|
||||||
|
# require 'parslet/export'
|
||||||
|
# class MyParser < Parslet::Parser
|
||||||
|
# root(:expression)
|
||||||
|
# rule(:expression) { str('foo') }
|
||||||
|
# end
|
||||||
|
#
|
||||||
|
# MyParser.new.to_citrus # => a citrus grammar as a string
|
||||||
|
#
|
||||||
|
def to_citrus
|
||||||
|
PrettyPrinter.new(Visitors::Citrus).
|
||||||
|
pretty_print(self.class.name, root)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Exports the current parser instance as a string in the Treetop dialect.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
#
|
||||||
|
# require 'parslet/export'
|
||||||
|
# class MyParser < Parslet::Parser
|
||||||
|
# root(:expression)
|
||||||
|
# rule(:expression) { str('foo') }
|
||||||
|
# end
|
||||||
|
#
|
||||||
|
# MyParser.new.to_treetop # => a treetop grammar as a string
|
||||||
|
#
|
||||||
|
def to_treetop
|
||||||
|
PrettyPrinter.new(Visitors::Treetop).
|
||||||
|
pretty_print(self.class.name, root)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
51
lib/parslet/expression.rb
Normal file
51
lib/parslet/expression.rb
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
|
||||||
|
# Allows specifying rules as strings using the exact same grammar that treetop
|
||||||
|
# does, minus the actions. This is on one hand a good example of a fully
|
||||||
|
# fledged parser and on the other hand might even turn out really useful.
|
||||||
|
#
|
||||||
|
# This can be viewed as an extension to parslet and might even be hosted in
|
||||||
|
# its own gem one fine day.
|
||||||
|
#
|
||||||
|
class Parslet::Expression
|
||||||
|
include Parslet
|
||||||
|
|
||||||
|
autoload :Treetop, 'parslet/expression/treetop'
|
||||||
|
|
||||||
|
# Creates a parslet from a foreign language expression.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
#
|
||||||
|
# Parslet::Expression.new("'a' 'b'")
|
||||||
|
#
|
||||||
|
def initialize(str, opts={}, context=self)
|
||||||
|
@type = opts[:type] || :treetop
|
||||||
|
@exp = str
|
||||||
|
@parslet = transform(
|
||||||
|
parse(str))
|
||||||
|
end
|
||||||
|
|
||||||
|
# Transforms the parse tree into a parslet expression.
|
||||||
|
#
|
||||||
|
def transform(tree)
|
||||||
|
transform = Treetop::Transform.new
|
||||||
|
|
||||||
|
# pp tree
|
||||||
|
transform.apply(tree)
|
||||||
|
rescue
|
||||||
|
warn "Could not transform: " + tree.inspect
|
||||||
|
raise
|
||||||
|
end
|
||||||
|
|
||||||
|
# Parses the string and returns a parse tree.
|
||||||
|
#
|
||||||
|
def parse(str)
|
||||||
|
parser = Treetop::Parser.new
|
||||||
|
parser.parse(str)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Turns this expression into a parslet.
|
||||||
|
#
|
||||||
|
def to_parslet
|
||||||
|
@parslet
|
||||||
|
end
|
||||||
|
end
|
92
lib/parslet/expression/treetop.rb
Normal file
92
lib/parslet/expression/treetop.rb
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
class Parslet::Expression::Treetop
|
||||||
|
class Parser < Parslet::Parser
|
||||||
|
root(:expression)
|
||||||
|
|
||||||
|
rule(:expression) { alternatives }
|
||||||
|
|
||||||
|
# alternative 'a' / 'b'
|
||||||
|
rule(:alternatives) {
|
||||||
|
(simple >> (spaced('/') >> simple).repeat).as(:alt)
|
||||||
|
}
|
||||||
|
|
||||||
|
# sequence by simple concatenation 'a' 'b'
|
||||||
|
rule(:simple) { occurrence.repeat(1).as(:seq) }
|
||||||
|
|
||||||
|
# occurrence modifiers
|
||||||
|
rule(:occurrence) {
|
||||||
|
atom.as(:repetition) >> spaced('*').as(:sign) |
|
||||||
|
atom.as(:repetition) >> spaced('+').as(:sign) |
|
||||||
|
atom.as(:repetition) >> repetition_spec |
|
||||||
|
|
||||||
|
atom.as(:maybe) >> spaced('?') |
|
||||||
|
atom
|
||||||
|
}
|
||||||
|
|
||||||
|
rule(:atom) {
|
||||||
|
spaced('(') >> expression.as(:unwrap) >> spaced(')') |
|
||||||
|
dot |
|
||||||
|
string |
|
||||||
|
char_class
|
||||||
|
}
|
||||||
|
|
||||||
|
# a character class
|
||||||
|
rule(:char_class) {
|
||||||
|
(str('[') >>
|
||||||
|
(str('\\') >> any |
|
||||||
|
str(']').absent? >> any).repeat(1) >>
|
||||||
|
str(']')).as(:match) >> space?
|
||||||
|
}
|
||||||
|
|
||||||
|
# anything at all
|
||||||
|
rule(:dot) { spaced('.').as(:any) }
|
||||||
|
|
||||||
|
# recognizing strings
|
||||||
|
rule(:string) {
|
||||||
|
str('\'') >>
|
||||||
|
(
|
||||||
|
(str('\\') >> any) |
|
||||||
|
(str("'").absent? >> any)
|
||||||
|
).repeat.as(:string) >>
|
||||||
|
str('\'') >> space?
|
||||||
|
}
|
||||||
|
|
||||||
|
# repetition specification like {1, 2}
|
||||||
|
rule(:repetition_spec) {
|
||||||
|
spaced('{') >>
|
||||||
|
integer.maybe.as(:min) >> spaced(',') >>
|
||||||
|
integer.maybe.as(:max) >> spaced('}')
|
||||||
|
}
|
||||||
|
rule(:integer) {
|
||||||
|
match['0-9'].repeat(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
# whitespace handling
|
||||||
|
rule(:space) { match("\s").repeat(1) }
|
||||||
|
rule(:space?) { space.maybe }
|
||||||
|
|
||||||
|
def spaced(str)
|
||||||
|
str(str) >> space?
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class Transform < Parslet::Transform
|
||||||
|
|
||||||
|
rule(:repetition => simple(:rep), :sign => simple(:sign)) {
|
||||||
|
min = sign=='+' ? 1 : 0
|
||||||
|
Parslet::Atoms::Repetition.new(rep, min, nil) }
|
||||||
|
rule(:repetition => simple(:rep), :min => simple(:min), :max => simple(:max)) {
|
||||||
|
Parslet::Atoms::Repetition.new(rep,
|
||||||
|
Integer(min || 0),
|
||||||
|
max && Integer(max) || nil) }
|
||||||
|
|
||||||
|
rule(:alt => subtree(:alt)) { Parslet::Atoms::Alternative.new(*alt) }
|
||||||
|
rule(:seq => sequence(:s)) { Parslet::Atoms::Sequence.new(*s) }
|
||||||
|
rule(:unwrap => simple(:u)) { u }
|
||||||
|
rule(:maybe => simple(:m)) { |d| d[:m].maybe }
|
||||||
|
rule(:string => simple(:s)) { Parslet::Atoms::Str.new(s) }
|
||||||
|
rule(:match => simple(:m)) { Parslet::Atoms::Re.new(m) }
|
||||||
|
rule(:any => simple(:a)) { Parslet::Atoms::Re.new('.') }
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
97
lib/parslet/graphviz.rb
Normal file
97
lib/parslet/graphviz.rb
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
|
||||||
|
# Paints a graphviz graph of your parser.
|
||||||
|
|
||||||
|
begin
|
||||||
|
require 'ruby-graphviz'
|
||||||
|
rescue LoadError
|
||||||
|
puts "Please install the 'ruby-graphviz' gem first."
|
||||||
|
fail
|
||||||
|
end
|
||||||
|
|
||||||
|
require 'set'
|
||||||
|
require 'parslet/atoms/visitor'
|
||||||
|
|
||||||
|
module Parslet
|
||||||
|
class GraphvizVisitor
|
||||||
|
def initialize g
|
||||||
|
@graph = g
|
||||||
|
@known_links = Set.new
|
||||||
|
@visited = Set.new
|
||||||
|
end
|
||||||
|
|
||||||
|
attr_reader :parent
|
||||||
|
|
||||||
|
def visit_parser(root)
|
||||||
|
recurse root, node('parser')
|
||||||
|
end
|
||||||
|
def visit_entity(name, block)
|
||||||
|
s = node(name)
|
||||||
|
|
||||||
|
downwards s
|
||||||
|
|
||||||
|
return if @visited.include?(name)
|
||||||
|
@visited << name
|
||||||
|
|
||||||
|
recurse block.call, s
|
||||||
|
end
|
||||||
|
def visit_named(name, atom)
|
||||||
|
recurse atom, parent
|
||||||
|
end
|
||||||
|
def visit_repetition(tag, min, max, atom)
|
||||||
|
recurse atom, parent
|
||||||
|
end
|
||||||
|
def visit_alternative(alternatives)
|
||||||
|
p = parent
|
||||||
|
alternatives.each do |atom|
|
||||||
|
recurse atom, p
|
||||||
|
end
|
||||||
|
end
|
||||||
|
def visit_sequence(sequence)
|
||||||
|
p = parent
|
||||||
|
sequence.each do |atom|
|
||||||
|
recurse atom, p
|
||||||
|
end
|
||||||
|
end
|
||||||
|
def visit_lookahead(positive, atom)
|
||||||
|
recurse atom, parent
|
||||||
|
end
|
||||||
|
def visit_re(regexp)
|
||||||
|
# downwards node(regexp.object_id, label: escape("re(#{regexp.inspect})"))
|
||||||
|
end
|
||||||
|
def visit_str(str)
|
||||||
|
# downwards node(str.object_id, label: escape("#{str.inspect}"))
|
||||||
|
end
|
||||||
|
|
||||||
|
def escape str
|
||||||
|
str.gsub('"', "'")
|
||||||
|
end
|
||||||
|
def node name, opts={}
|
||||||
|
@graph.add_nodes name.to_s, opts
|
||||||
|
end
|
||||||
|
def downwards child
|
||||||
|
if @parent && !@known_links.include?([@parent, child])
|
||||||
|
@graph.add_edges(@parent, child)
|
||||||
|
@known_links << [@parent, child]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
def recurse node, current
|
||||||
|
@parent = current
|
||||||
|
node.accept(self)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
module Graphable
|
||||||
|
def graph opts
|
||||||
|
g = GraphViz.new(:G, type: :digraph)
|
||||||
|
visitor = GraphvizVisitor.new(g)
|
||||||
|
|
||||||
|
new.accept(visitor)
|
||||||
|
|
||||||
|
g.output opts
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class Parser # reopen for introducing the .graph method
|
||||||
|
extend Graphable
|
||||||
|
end
|
||||||
|
end
|
67
lib/parslet/parser.rb
Normal file
67
lib/parslet/parser.rb
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
|
||||||
|
# The base class for all your parsers. Use as follows:
|
||||||
|
#
|
||||||
|
# require 'parslet'
|
||||||
|
#
|
||||||
|
# class MyParser < Parslet::Parser
|
||||||
|
# rule(:a) { str('a').repeat }
|
||||||
|
# root(:a)
|
||||||
|
# end
|
||||||
|
#
|
||||||
|
# pp MyParser.new.parse('aaaa') # => 'aaaa'
|
||||||
|
# pp MyParser.new.parse('bbbb') # => Parslet::Atoms::ParseFailed:
|
||||||
|
# # Don't know what to do with bbbb at line 1 char 1.
|
||||||
|
#
|
||||||
|
# Parslet::Parser is also a grammar atom. This means that you can mix full
|
||||||
|
# fledged parsers freely with small parts of a different parser.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# class ParserA < Parslet::Parser
|
||||||
|
# root :aaa
|
||||||
|
# rule(:aaa) { str('a').repeat(3,3) }
|
||||||
|
# end
|
||||||
|
# class ParserB < Parslet::Parser
|
||||||
|
# root :expression
|
||||||
|
# rule(:expression) { str('b') >> ParserA.new >> str('b') }
|
||||||
|
# end
|
||||||
|
#
|
||||||
|
# In the above example, ParserB would parse something like 'baaab'.
|
||||||
|
#
|
||||||
|
class Parslet::Parser < Parslet::Atoms::Base
|
||||||
|
include Parslet
|
||||||
|
|
||||||
|
class <<self # class methods
|
||||||
|
# Define the parsers #root function. This is the place where you start
|
||||||
|
# parsing; if you have a rule for 'file' that describes what should be
|
||||||
|
# in a file, this would be your root declaration:
|
||||||
|
#
|
||||||
|
# class Parser
|
||||||
|
# root :file
|
||||||
|
# rule(:file) { ... }
|
||||||
|
# end
|
||||||
|
#
|
||||||
|
# #root declares a 'parse' function that works just like the parse
|
||||||
|
# function that you can call on a simple parslet, taking a string as input
|
||||||
|
# and producing parse output.
|
||||||
|
#
|
||||||
|
# In a way, #root is a shorthand for:
|
||||||
|
#
|
||||||
|
# def parse(str)
|
||||||
|
# your_parser_root.parse(str)
|
||||||
|
# end
|
||||||
|
#
|
||||||
|
def root(name)
|
||||||
|
define_method(:root) do
|
||||||
|
self.send(name)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def try(source, context, consume_all)
|
||||||
|
root.try(source, context, consume_all)
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_s_inner(prec)
|
||||||
|
root.to_s(prec)
|
||||||
|
end
|
||||||
|
end
|
114
lib/parslet/pattern.rb
Normal file
114
lib/parslet/pattern.rb
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
# Matches trees against expressions. Trees are formed by arrays and hashes
|
||||||
|
# for expressing membership and sequence. The leafs of the tree are other
|
||||||
|
# classes.
|
||||||
|
#
|
||||||
|
# A tree issued by the parslet library might look like this:
|
||||||
|
#
|
||||||
|
# {
|
||||||
|
# :function_call => {
|
||||||
|
# :name => 'foobar',
|
||||||
|
# :args => [1, 2, 3]
|
||||||
|
# }
|
||||||
|
# }
|
||||||
|
#
|
||||||
|
# A pattern that would match against this tree would be:
|
||||||
|
#
|
||||||
|
# { :function_call => { :name => simple(:name), :args => sequence(:args) }}
|
||||||
|
#
|
||||||
|
# Note that Parslet::Pattern only matches at a given subtree; it wont try
|
||||||
|
# to match recursively. To do that, please use Parslet::Transform.
|
||||||
|
#
|
||||||
|
class Parslet::Pattern
|
||||||
|
def initialize(pattern)
|
||||||
|
@pattern = pattern
|
||||||
|
end
|
||||||
|
|
||||||
|
# Decides if the given subtree matches this pattern. Returns the bindings
|
||||||
|
# made on a successful match or nil if the match fails. If you specify
|
||||||
|
# bindings to be a hash, the mappings in it will be treated like bindings
|
||||||
|
# made during an attempted match.
|
||||||
|
#
|
||||||
|
# Pattern.new('a').match('a', :foo => 'bar') # => { :foo => 'bar' }
|
||||||
|
#
|
||||||
|
# @param subtree [String, Hash, Array] poro subtree returned by a parse
|
||||||
|
# @param bindings [Hash] variable bindings to be verified
|
||||||
|
# @return [Hash, nil] On success: variable bindings that allow a match. On
|
||||||
|
# failure: nil
|
||||||
|
#
|
||||||
|
def match(subtree, bindings=nil)
|
||||||
|
bindings = bindings && bindings.dup || Hash.new
|
||||||
|
return bindings if element_match(subtree, @pattern, bindings)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Returns true if the tree element given by +tree+ matches the expression
|
||||||
|
# given by +exp+. This match must respect bindings already made in
|
||||||
|
# +bindings+. Note that bindings is carried along and modified.
|
||||||
|
#
|
||||||
|
# @api private
|
||||||
|
#
|
||||||
|
def element_match(tree, exp, bindings)
|
||||||
|
# p [:elm, tree, exp]
|
||||||
|
case [tree, exp].map { |e| e.class }
|
||||||
|
when [Hash,Hash]
|
||||||
|
return element_match_hash(tree, exp, bindings)
|
||||||
|
when [Array,Array]
|
||||||
|
return element_match_ary_single(tree, exp, bindings)
|
||||||
|
else
|
||||||
|
# If elements match exactly, then that is good enough in all cases
|
||||||
|
return true if exp === tree
|
||||||
|
|
||||||
|
# If exp is a bind variable: Check if the binding matches
|
||||||
|
if exp.respond_to?(:can_bind?) && exp.can_bind?(tree)
|
||||||
|
return element_match_binding(tree, exp, bindings)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Otherwise: No match (we don't know anything about the element
|
||||||
|
# combination)
|
||||||
|
return false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# @api private
|
||||||
|
#
|
||||||
|
def element_match_binding(tree, exp, bindings)
|
||||||
|
var_name = exp.variable_name
|
||||||
|
|
||||||
|
# TODO test for the hidden :_ feature.
|
||||||
|
if var_name && bound_value = bindings[var_name]
|
||||||
|
return bound_value == tree
|
||||||
|
end
|
||||||
|
|
||||||
|
# New binding:
|
||||||
|
bindings.store var_name, tree
|
||||||
|
|
||||||
|
return true
|
||||||
|
end
|
||||||
|
|
||||||
|
# @api private
|
||||||
|
#
|
||||||
|
def element_match_ary_single(sequence, exp, bindings)
|
||||||
|
return false if sequence.size != exp.size
|
||||||
|
|
||||||
|
return sequence.zip(exp).all? { |elt, subexp|
|
||||||
|
element_match(elt, subexp, bindings) }
|
||||||
|
end
|
||||||
|
|
||||||
|
# @api private
|
||||||
|
#
|
||||||
|
def element_match_hash(tree, exp, bindings)
|
||||||
|
# Early failure when one hash is bigger than the other
|
||||||
|
return false unless exp.size == tree.size
|
||||||
|
|
||||||
|
# We iterate over expected pattern, since we demand that the keys that
|
||||||
|
# are there should be in tree as well.
|
||||||
|
exp.each do |expected_key, expected_value|
|
||||||
|
return false unless tree.has_key? expected_key
|
||||||
|
|
||||||
|
# Recurse into the value and stop early on failure
|
||||||
|
value = tree[expected_key]
|
||||||
|
return false unless element_match(value, expected_value, bindings)
|
||||||
|
end
|
||||||
|
|
||||||
|
return true
|
||||||
|
end
|
||||||
|
end
|
49
lib/parslet/pattern/binding.rb
Normal file
49
lib/parslet/pattern/binding.rb
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
|
||||||
|
# Used internally for representing a bind placeholder in a Parslet::Transform
|
||||||
|
# pattern. This is the superclass for all bindings.
|
||||||
|
#
|
||||||
|
# It defines the most permissive kind of bind, the one that matches any subtree
|
||||||
|
# whatever it looks like.
|
||||||
|
#
|
||||||
|
class Parslet::Pattern::SubtreeBind < Struct.new(:symbol)
|
||||||
|
def variable_name
|
||||||
|
symbol
|
||||||
|
end
|
||||||
|
|
||||||
|
def inspect
|
||||||
|
"#{bind_type_name}(#{symbol.inspect})"
|
||||||
|
end
|
||||||
|
|
||||||
|
def can_bind?(subtree)
|
||||||
|
true
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
def bind_type_name
|
||||||
|
if md=self.class.name.match(/(\w+)Bind/)
|
||||||
|
md.captures.first.downcase
|
||||||
|
else
|
||||||
|
# This path should never be used, but since this is for inspection only,
|
||||||
|
# let's not raise.
|
||||||
|
'unknown_bind'
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Binds a symbol to a simple subtree, one that is not either a sequence of
|
||||||
|
# elements or a collection of attributes.
|
||||||
|
#
|
||||||
|
class Parslet::Pattern::SimpleBind < Parslet::Pattern::SubtreeBind
|
||||||
|
def can_bind?(subtree)
|
||||||
|
not [Hash, Array].include?(subtree.class)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Binds a symbol to a sequence of simple leafs ([element1, element2, ...])
|
||||||
|
#
|
||||||
|
class Parslet::Pattern::SequenceBind < Parslet::Pattern::SubtreeBind
|
||||||
|
def can_bind?(subtree)
|
||||||
|
subtree.kind_of?(Array) &&
|
||||||
|
(not subtree.any? { |el| [Hash, Array].include?(el.class) })
|
||||||
|
end
|
||||||
|
end
|
59
lib/parslet/rig/rspec.rb
Normal file
59
lib/parslet/rig/rspec.rb
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
RSpec::Matchers.define(:parse) do |input, opts|
|
||||||
|
as = block = nil
|
||||||
|
result = trace = nil
|
||||||
|
|
||||||
|
unless self.respond_to? :failure_message # if RSpec 2.x
|
||||||
|
class << self
|
||||||
|
alias_method :failure_message, :failure_message_for_should
|
||||||
|
alias_method :failure_message_when_negated, :failure_message_for_should_not
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
match do |parser|
|
||||||
|
begin
|
||||||
|
result = parser.parse(input)
|
||||||
|
block ?
|
||||||
|
block.call(result) :
|
||||||
|
(as == result || as.nil?)
|
||||||
|
rescue Parslet::ParseFailed => ex
|
||||||
|
trace = ex.cause.ascii_tree if opts && opts[:trace]
|
||||||
|
false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
failure_message do |is|
|
||||||
|
if block
|
||||||
|
"expected output of parsing #{input.inspect}" <<
|
||||||
|
" with #{is.inspect} to meet block conditions, but it didn't"
|
||||||
|
else
|
||||||
|
"expected " <<
|
||||||
|
(as ?
|
||||||
|
"output of parsing #{input.inspect}"<<
|
||||||
|
" with #{is.inspect} to equal #{as.inspect}, but was #{result.inspect}" :
|
||||||
|
"#{is.inspect} to be able to parse #{input.inspect}") <<
|
||||||
|
(trace ?
|
||||||
|
"\n"+trace :
|
||||||
|
'')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
failure_message_when_negated do |is|
|
||||||
|
if block
|
||||||
|
"expected output of parsing #{input.inspect} with #{is.inspect} not to meet block conditions, but it did"
|
||||||
|
else
|
||||||
|
"expected " <<
|
||||||
|
(as ?
|
||||||
|
"output of parsing #{input.inspect}"<<
|
||||||
|
" with #{is.inspect} not to equal #{as.inspect}" :
|
||||||
|
|
||||||
|
"#{is.inspect} to not parse #{input.inspect}, but it did")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# NOTE: This has a nodoc tag since the rdoc parser puts this into
|
||||||
|
# Object, a thing I would never allow.
|
||||||
|
chain :as do |expected_output, &block|
|
||||||
|
as = expected_output
|
||||||
|
block = block
|
||||||
|
end
|
||||||
|
end
|
42
lib/parslet/scope.rb
Normal file
42
lib/parslet/scope.rb
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
class Parslet::Scope
|
||||||
|
# Raised when the accessed slot has never been assigned a value.
|
||||||
|
#
|
||||||
|
class NotFound < StandardError
|
||||||
|
end
|
||||||
|
|
||||||
|
class Binding
|
||||||
|
attr_reader :parent
|
||||||
|
|
||||||
|
def initialize(parent=nil)
|
||||||
|
@parent = parent
|
||||||
|
@hash = Hash.new
|
||||||
|
end
|
||||||
|
|
||||||
|
def [](k)
|
||||||
|
@hash.has_key?(k) && @hash[k] ||
|
||||||
|
parent && parent[k] or
|
||||||
|
raise NotFound
|
||||||
|
end
|
||||||
|
def []=(k,v)
|
||||||
|
@hash.store(k,v)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def [](k)
|
||||||
|
@current[k]
|
||||||
|
end
|
||||||
|
def []=(k,v)
|
||||||
|
@current[k] = v
|
||||||
|
end
|
||||||
|
|
||||||
|
def initialize
|
||||||
|
@current = Binding.new
|
||||||
|
end
|
||||||
|
|
||||||
|
def push
|
||||||
|
@current = Binding.new(@current)
|
||||||
|
end
|
||||||
|
def pop
|
||||||
|
@current = @current.parent
|
||||||
|
end
|
||||||
|
end
|
101
lib/parslet/slice.rb
Normal file
101
lib/parslet/slice.rb
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
|
||||||
|
# A slice is a small part from the parse input. A slice mainly behaves like
|
||||||
|
# any other string, except that it remembers where it came from (offset in
|
||||||
|
# original input).
|
||||||
|
#
|
||||||
|
# == Extracting line and column
|
||||||
|
#
|
||||||
|
# Using the #line_and_column method, you can extract the line and column in
|
||||||
|
# the original input where this slice starts.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# slice.line_and_column # => [1, 13]
|
||||||
|
# slice.offset # => 12
|
||||||
|
#
|
||||||
|
# == Likeness to strings
|
||||||
|
#
|
||||||
|
# Parslet::Slice behaves in many ways like a Ruby String. This likeness
|
||||||
|
# however is not complete - many of the myriad of operations String supports
|
||||||
|
# are not yet in Slice. You can always extract the internal string instance by
|
||||||
|
# calling #to_s.
|
||||||
|
#
|
||||||
|
# These omissions are somewhat intentional. Rather than maintaining a full
|
||||||
|
# delegation, we opt for a partial emulation that gets the job done.
|
||||||
|
#
|
||||||
|
class Parslet::Slice
|
||||||
|
attr_reader :str, :offset
|
||||||
|
attr_reader :line_cache
|
||||||
|
|
||||||
|
# Construct a slice using a string, an offset and an optional line cache.
|
||||||
|
# The line cache should be able to answer to the #line_and_column message.
|
||||||
|
#
|
||||||
|
def initialize(string, offset, line_cache=nil)
|
||||||
|
@str, @offset = string, offset
|
||||||
|
@line_cache = line_cache
|
||||||
|
end
|
||||||
|
|
||||||
|
# Compares slices to other slices or strings.
|
||||||
|
#
|
||||||
|
def == other
|
||||||
|
str == other
|
||||||
|
end
|
||||||
|
|
||||||
|
# Match regular expressions.
|
||||||
|
#
|
||||||
|
def match(regexp)
|
||||||
|
str.match(regexp)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Returns the slices size in characters.
|
||||||
|
#
|
||||||
|
def size
|
||||||
|
str.size
|
||||||
|
end
|
||||||
|
|
||||||
|
# Concatenate two slices; it is assumed that the second slice begins
|
||||||
|
# where the first one ends. The offset of the resulting slice is the same
|
||||||
|
# as the one of this slice.
|
||||||
|
#
|
||||||
|
def +(other)
|
||||||
|
self.class.new(str + other.to_s, offset, line_cache)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Returns a <line, column> tuple referring to the original input.
|
||||||
|
#
|
||||||
|
def line_and_column
|
||||||
|
raise ArgumentError, "No line cache was given, cannot infer line and column." \
|
||||||
|
unless line_cache
|
||||||
|
|
||||||
|
line_cache.line_and_column(self.offset)
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
# Conversion operators -----------------------------------------------------
|
||||||
|
def to_str
|
||||||
|
str
|
||||||
|
end
|
||||||
|
alias to_s to_str
|
||||||
|
|
||||||
|
def to_slice
|
||||||
|
self
|
||||||
|
end
|
||||||
|
def to_sym
|
||||||
|
str.to_sym
|
||||||
|
end
|
||||||
|
def to_int
|
||||||
|
Integer(str)
|
||||||
|
end
|
||||||
|
def to_i
|
||||||
|
str.to_i
|
||||||
|
end
|
||||||
|
def to_f
|
||||||
|
str.to_f
|
||||||
|
end
|
||||||
|
|
||||||
|
# Inspection & Debugging ---------------------------------------------------
|
||||||
|
|
||||||
|
# Prints the slice as <code>"string"@offset</code>.
|
||||||
|
def inspect
|
||||||
|
str.inspect << "@#{offset}"
|
||||||
|
end
|
||||||
|
end
|
87
lib/parslet/source.rb
Normal file
87
lib/parslet/source.rb
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
|
||||||
|
require 'stringio'
|
||||||
|
require 'strscan'
|
||||||
|
|
||||||
|
require 'parslet/source/line_cache'
|
||||||
|
|
||||||
|
module Parslet
|
||||||
|
# Wraps the input string for parslet.
|
||||||
|
#
|
||||||
|
class Source
|
||||||
|
def initialize(str)
|
||||||
|
raise(
|
||||||
|
ArgumentError,
|
||||||
|
"Must construct Source with a string like object."
|
||||||
|
) unless str.respond_to?(:to_str)
|
||||||
|
|
||||||
|
@str = StringScanner.new(str)
|
||||||
|
|
||||||
|
# maps 1 => /./m, 2 => /../m, etc...
|
||||||
|
@re_cache = Hash.new { |h,k|
|
||||||
|
h[k] = /(.|$){#{k}}/m }
|
||||||
|
|
||||||
|
@line_cache = LineCache.new
|
||||||
|
@line_cache.scan_for_line_endings(0, str)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Checks if the given pattern matches at the current input position.
|
||||||
|
#
|
||||||
|
# @param pattern [Regexp] pattern to check for
|
||||||
|
# @return [Boolean] true if the pattern matches at #pos
|
||||||
|
#
|
||||||
|
def matches?(pattern)
|
||||||
|
@str.match?(pattern)
|
||||||
|
end
|
||||||
|
alias match matches?
|
||||||
|
|
||||||
|
# Consumes n characters from the input, returning them as a slice of the
|
||||||
|
# input.
|
||||||
|
#
|
||||||
|
def consume(n)
|
||||||
|
original_pos = @str.pos
|
||||||
|
slice_str = @str.scan(@re_cache[n])
|
||||||
|
slice = Parslet::Slice.new(
|
||||||
|
slice_str,
|
||||||
|
original_pos,
|
||||||
|
@line_cache)
|
||||||
|
|
||||||
|
return slice
|
||||||
|
end
|
||||||
|
|
||||||
|
# Returns how many chars remain in the input.
|
||||||
|
#
|
||||||
|
def chars_left
|
||||||
|
@str.rest_size
|
||||||
|
end
|
||||||
|
|
||||||
|
# Returns how many chars there are between current position and the
|
||||||
|
# string given. If the string given doesn't occur in the source, then
|
||||||
|
# the remaining chars (#chars_left) are returned.
|
||||||
|
#
|
||||||
|
# @return [Fixnum] count of chars until str or #chars_left
|
||||||
|
#
|
||||||
|
def chars_until str
|
||||||
|
slice_str = @str.check_until(Regexp.new(Regexp.escape(str)))
|
||||||
|
return chars_left unless slice_str
|
||||||
|
return slice_str.size - str.size
|
||||||
|
end
|
||||||
|
|
||||||
|
# Position of the parse as a character offset into the original string.
|
||||||
|
# @note: Encodings...
|
||||||
|
def pos
|
||||||
|
@str.pos
|
||||||
|
end
|
||||||
|
def pos=(n)
|
||||||
|
@str.pos = n
|
||||||
|
rescue RangeError
|
||||||
|
end
|
||||||
|
|
||||||
|
# Returns a <line, column> tuple for the given position. If no position is
|
||||||
|
# given, line/column information is returned for the current position
|
||||||
|
# given by #pos.
|
||||||
|
#
|
||||||
|
def line_and_column(position=nil)
|
||||||
|
@line_cache.line_and_column(position || self.pos)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
96
lib/parslet/source/line_cache.rb
Normal file
96
lib/parslet/source/line_cache.rb
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
|
||||||
|
|
||||||
|
class Parslet::Source
|
||||||
|
# A cache for line start positions.
|
||||||
|
#
|
||||||
|
class LineCache
|
||||||
|
def initialize
|
||||||
|
# Stores line endings as a simple position number. The first line always
|
||||||
|
# starts at 0; numbers beyond the biggest entry are on any line > size,
|
||||||
|
# but probably make a scan to that position neccessary.
|
||||||
|
@line_ends = []
|
||||||
|
@line_ends.extend RangeSearch
|
||||||
|
end
|
||||||
|
|
||||||
|
# Returns a <line, column> tuple for the given input position.
|
||||||
|
#
|
||||||
|
def line_and_column(pos)
|
||||||
|
eol_idx = @line_ends.lbound(pos)
|
||||||
|
|
||||||
|
if eol_idx
|
||||||
|
# eol_idx points to the offset that ends the current line.
|
||||||
|
# Let's try to find the offset that starts it:
|
||||||
|
offset = eol_idx>0 && @line_ends[eol_idx-1] || 0
|
||||||
|
return [eol_idx+1, pos-offset+1]
|
||||||
|
else
|
||||||
|
# eol_idx is nil, that means that we're beyond the last line end that
|
||||||
|
# we know about. Pretend for now that we're just on the last line.
|
||||||
|
offset = @line_ends.last || 0
|
||||||
|
return [@line_ends.size+1, pos-offset+1]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def scan_for_line_endings(start_pos, buf)
|
||||||
|
return unless buf
|
||||||
|
|
||||||
|
buf = StringScanner.new(buf)
|
||||||
|
return unless buf.exist?(/\n/)
|
||||||
|
|
||||||
|
## If we have already read part or all of buf, we already know about
|
||||||
|
## line ends in that portion. remove it and correct cur (search index)
|
||||||
|
if @last_line_end && start_pos < @last_line_end
|
||||||
|
# Let's not search the range from start_pos to last_line_end again.
|
||||||
|
buf.pos = @last_line_end - start_pos
|
||||||
|
end
|
||||||
|
|
||||||
|
## Scan the string for line endings; store the positions of all endings
|
||||||
|
## in @line_ends.
|
||||||
|
while buf.skip_until(/\n/)
|
||||||
|
@last_line_end = start_pos + buf.pos
|
||||||
|
@line_ends << @last_line_end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Mixin for arrays that implicitly give a number of ranges, where one range
|
||||||
|
# begins where the other one ends.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
#
|
||||||
|
# [10, 20, 30]
|
||||||
|
# # would describe [0, 10], (10, 20], (20, 30]
|
||||||
|
#
|
||||||
|
module RangeSearch
|
||||||
|
def find_mid(left, right)
|
||||||
|
# NOTE: Jonathan Hinkle reported that when mathn is required, just
|
||||||
|
# dividing and relying on the integer truncation is not enough.
|
||||||
|
left + ((right - left) / 2).floor
|
||||||
|
end
|
||||||
|
|
||||||
|
# Scans the array for the first number that is > than bound. Returns the
|
||||||
|
# index of that number.
|
||||||
|
#
|
||||||
|
def lbound(bound)
|
||||||
|
return nil if empty?
|
||||||
|
return nil unless last > bound
|
||||||
|
|
||||||
|
left = 0
|
||||||
|
right = size - 1
|
||||||
|
|
||||||
|
loop do
|
||||||
|
mid = find_mid(left, right)
|
||||||
|
|
||||||
|
if self[mid] > bound
|
||||||
|
right = mid
|
||||||
|
else
|
||||||
|
# assert: self[mid] <= bound
|
||||||
|
left = mid+1
|
||||||
|
end
|
||||||
|
|
||||||
|
if right <= left
|
||||||
|
return right
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
236
lib/parslet/transform.rb
Normal file
236
lib/parslet/transform.rb
Normal file
@ -0,0 +1,236 @@
|
|||||||
|
|
||||||
|
require 'parslet/pattern'
|
||||||
|
|
||||||
|
# Transforms an expression tree into something else. The transformation
|
||||||
|
# performs a depth-first, post-order traversal of the expression tree. During
|
||||||
|
# that traversal, each time a rule matches a node, the node is replaced by the
|
||||||
|
# result of the block associated to the rule. Otherwise the node is accepted
|
||||||
|
# as is into the result tree.
|
||||||
|
#
|
||||||
|
# This is almost what you would generally do with a tree visitor, except that
|
||||||
|
# you can match several levels of the tree at once.
|
||||||
|
#
|
||||||
|
# As a consequence of this, the resulting tree will contain pieces of the
|
||||||
|
# original tree and new pieces. Most likely, you will want to transform the
|
||||||
|
# original tree wholly, so this isn't a problem.
|
||||||
|
#
|
||||||
|
# You will not be able to create a loop, given that each node will be replaced
|
||||||
|
# only once and then left alone. This means that the results of a replacement
|
||||||
|
# will not be acted upon.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
#
|
||||||
|
# class Example < Parslet::Transform
|
||||||
|
# rule(:string => simple(:x)) { # (1)
|
||||||
|
# StringLiteral.new(x)
|
||||||
|
# }
|
||||||
|
# end
|
||||||
|
#
|
||||||
|
# A tree transform (Parslet::Transform) is defined by a set of rules. Each
|
||||||
|
# rule can be defined by calling #rule with the pattern as argument. The block
|
||||||
|
# given will be called every time the rule matches somewhere in the tree given
|
||||||
|
# to #apply. It is passed a Hash containing all the variable bindings of this
|
||||||
|
# pattern match.
|
||||||
|
#
|
||||||
|
# In the above example, (1) illustrates a simple matching rule.
|
||||||
|
#
|
||||||
|
# Let's say you want to parse matching parentheses and distill a maximum nest
|
||||||
|
# depth. You would probably write a parser like the one in example/parens.rb;
|
||||||
|
# here's the relevant part:
|
||||||
|
#
|
||||||
|
# rule(:balanced) {
|
||||||
|
# str('(').as(:l) >> balanced.maybe.as(:m) >> str(')').as(:r)
|
||||||
|
# }
|
||||||
|
#
|
||||||
|
# If you now apply this to a string like '(())', you get a intermediate parse
|
||||||
|
# tree that looks like this:
|
||||||
|
#
|
||||||
|
# {
|
||||||
|
# l: '(',
|
||||||
|
# m: {
|
||||||
|
# l: '(',
|
||||||
|
# m: nil,
|
||||||
|
# r: ')'
|
||||||
|
# },
|
||||||
|
# r: ')'
|
||||||
|
# }
|
||||||
|
#
|
||||||
|
# This parse tree is good for debugging, but what we would really like to have
|
||||||
|
# is just the nesting depth. This transformation rule will produce that:
|
||||||
|
#
|
||||||
|
# rule(:l => '(', :m => simple(:x), :r => ')') {
|
||||||
|
# # innermost :m will contain nil
|
||||||
|
# x.nil? ? 1 : x+1
|
||||||
|
# }
|
||||||
|
#
|
||||||
|
# = Usage patterns
|
||||||
|
#
|
||||||
|
# There are four ways of using this class. The first one is very much
|
||||||
|
# recommended, followed by the second one for generality. The other ones are
|
||||||
|
# omitted here.
|
||||||
|
#
|
||||||
|
# Recommended usage is as follows:
|
||||||
|
#
|
||||||
|
# class MyTransformator < Parslet::Transform
|
||||||
|
# rule(...) { ... }
|
||||||
|
# rule(...) { ... }
|
||||||
|
# # ...
|
||||||
|
# end
|
||||||
|
# MyTransformator.new.apply(tree)
|
||||||
|
#
|
||||||
|
# Alternatively, you can use the Transform class as follows:
|
||||||
|
#
|
||||||
|
# transform = Parslet::Transform.new do
|
||||||
|
# rule(...) { ... }
|
||||||
|
# end
|
||||||
|
# transform.apply(tree)
|
||||||
|
#
|
||||||
|
# = Execution context
|
||||||
|
#
|
||||||
|
# The execution context of action blocks differs depending on the arity of
|
||||||
|
# said blocks. This can be confusing. It is however somewhat intentional. You
|
||||||
|
# should not create fat Transform descendants containing a lot of helper methods,
|
||||||
|
# instead keep your AST class construction in global scope or make it available
|
||||||
|
# through a factory. The following piece of code illustrates usage of global
|
||||||
|
# scope:
|
||||||
|
#
|
||||||
|
# transform = Parslet::Transform.new do
|
||||||
|
# rule(...) { AstNode.new(a_variable) }
|
||||||
|
# rule(...) { Ast.node(a_variable) } # modules are nice
|
||||||
|
# end
|
||||||
|
# transform.apply(tree)
|
||||||
|
#
|
||||||
|
# And here's how you would use a class builder (a factory):
|
||||||
|
#
|
||||||
|
# transform = Parslet::Transform.new do
|
||||||
|
# rule(...) { builder.add_node(a_variable) }
|
||||||
|
# rule(...) { |d| d[:builder].add_node(d[:a_variable]) }
|
||||||
|
# end
|
||||||
|
# transform.apply(tree, :builder => Builder.new)
|
||||||
|
#
|
||||||
|
# As you can see, Transform allows you to inject local context for your rule
|
||||||
|
# action blocks to use.
|
||||||
|
#
|
||||||
|
class Parslet::Transform
|
||||||
|
# FIXME: Maybe only part of it? Or maybe only include into constructor
|
||||||
|
# context?
|
||||||
|
include Parslet
|
||||||
|
|
||||||
|
class << self
|
||||||
|
# FIXME: Only do this for subclasses?
|
||||||
|
include Parslet
|
||||||
|
|
||||||
|
# Define a rule for the transform subclass.
|
||||||
|
#
|
||||||
|
def rule(expression, &block)
|
||||||
|
@__transform_rules ||= []
|
||||||
|
@__transform_rules << [Parslet::Pattern.new(expression), block]
|
||||||
|
end
|
||||||
|
|
||||||
|
# Allows accessing the class' rules
|
||||||
|
#
|
||||||
|
def rules
|
||||||
|
@__transform_rules || []
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def initialize(&block)
|
||||||
|
@rules = []
|
||||||
|
|
||||||
|
if block
|
||||||
|
instance_eval(&block)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Defines a rule to be applied whenever apply is called on a tree. A rule
|
||||||
|
# is composed of two parts:
|
||||||
|
#
|
||||||
|
# * an *expression pattern*
|
||||||
|
# * a *transformation block*
|
||||||
|
#
|
||||||
|
def rule(expression, &block)
|
||||||
|
@rules << [
|
||||||
|
Parslet::Pattern.new(expression),
|
||||||
|
block
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
|
# Applies the transformation to a tree that is generated by Parslet::Parser
|
||||||
|
# or a simple parslet. Transformation will proceed down the tree, replacing
|
||||||
|
# parts/all of it with new objects. The resulting object will be returned.
|
||||||
|
#
|
||||||
|
def apply(obj, context=nil)
|
||||||
|
transform_elt(
|
||||||
|
case obj
|
||||||
|
when Hash
|
||||||
|
recurse_hash(obj, context)
|
||||||
|
when Array
|
||||||
|
recurse_array(obj, context)
|
||||||
|
else
|
||||||
|
obj
|
||||||
|
end,
|
||||||
|
context
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Executes the block on the bindings obtained by Pattern#match, if such a match
|
||||||
|
# can be made. Depending on the arity of the given block, it is called in
|
||||||
|
# one of two environments: the current one or a clean toplevel environment.
|
||||||
|
#
|
||||||
|
# If you would like the current environment preserved, please use the
|
||||||
|
# arity 1 variant of the block. Alternatively, you can inject a context object
|
||||||
|
# and call methods on it (think :ctx => self).
|
||||||
|
#
|
||||||
|
# # the local variable a is simulated
|
||||||
|
# t.call_on_match(:a => :b) { a }
|
||||||
|
# # no change of environment here
|
||||||
|
# t.call_on_match(:a => :b) { |d| d[:a] }
|
||||||
|
#
|
||||||
|
def call_on_match(bindings, block)
|
||||||
|
if block
|
||||||
|
if block.arity == 1
|
||||||
|
return block.call(bindings)
|
||||||
|
else
|
||||||
|
context = Context.new(bindings)
|
||||||
|
return context.instance_eval(&block)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Allow easy access to all rules, the ones defined in the instance and the
|
||||||
|
# ones predefined in a subclass definition.
|
||||||
|
#
|
||||||
|
def rules
|
||||||
|
self.class.rules + @rules
|
||||||
|
end
|
||||||
|
|
||||||
|
# @api private
|
||||||
|
#
|
||||||
|
def transform_elt(elt, context)
|
||||||
|
rules.each do |pattern, block|
|
||||||
|
if bindings=pattern.match(elt, context)
|
||||||
|
# Produces transformed value
|
||||||
|
return call_on_match(bindings, block)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# No rule matched - element is not transformed
|
||||||
|
return elt
|
||||||
|
end
|
||||||
|
|
||||||
|
# @api private
|
||||||
|
#
|
||||||
|
def recurse_hash(hsh, ctx)
|
||||||
|
hsh.inject({}) do |new_hsh, (k,v)|
|
||||||
|
new_hsh[k] = apply(v, ctx)
|
||||||
|
new_hsh
|
||||||
|
end
|
||||||
|
end
|
||||||
|
# @api private
|
||||||
|
#
|
||||||
|
def recurse_array(ary, ctx)
|
||||||
|
ary.map { |elt| apply(elt, ctx) }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
require 'parslet/context'
|
Loading…
Reference in New Issue
Block a user