vendored parslet, deemed stable enough and better without dependency
This commit is contained in:
parent
6fafeda66d
commit
b1203363d4
@ -1,5 +1,3 @@
|
||||
# parslet is assumed to be checked out at the same level as crystal for now
|
||||
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', ".." , "parslet",'lib'))
|
||||
require 'parslet'
|
||||
|
||||
require "asm/program"
|
||||
|
302
lib/parslet.rb
Normal file
302
lib/parslet.rb
Normal file
@ -0,0 +1,302 @@
|
||||
# A simple parser generator library. Typical usage would look like this:
|
||||
#
|
||||
# require 'parslet'
|
||||
#
|
||||
# class MyParser < Parslet::Parser
|
||||
# rule(:a) { str('a').repeat }
|
||||
# root(:a)
|
||||
# end
|
||||
#
|
||||
# pp MyParser.new.parse('aaaa') # => 'aaaa'@0
|
||||
# pp MyParser.new.parse('bbbb') # => Parslet::Atoms::ParseFailed:
|
||||
# # Don't know what to do with bbbb at line 1 char 1.
|
||||
#
|
||||
# The simple DSL allows you to define grammars in PEG-style. This kind of
|
||||
# grammar construction does away with the ambiguities that usually comes with
|
||||
# parsers; instead, it allows you to construct grammars that are easier to
|
||||
# debug, since less magic is involved.
|
||||
#
|
||||
# Parslet is typically used in stages:
|
||||
#
|
||||
#
|
||||
# * Parsing the input string; this yields an intermediary tree, see
|
||||
# Parslet.any, Parslet.match, Parslet.str, Parslet::ClassMethods#rule and
|
||||
# Parslet::ClassMethods#root.
|
||||
# * Transformation of the tree into something useful to you, see
|
||||
# Parslet::Transform, Parslet.simple, Parslet.sequence and Parslet.subtree.
|
||||
#
|
||||
# The first stage is traditionally intermingled with the second stage; output
|
||||
# from the second stage is usually called the 'Abstract Syntax Tree' or AST.
|
||||
#
|
||||
# The stages are completely decoupled; You can change your grammar around and
|
||||
# use the second stage to isolate the rest of your code from the changes
|
||||
# you've effected.
|
||||
#
|
||||
# == Further reading
|
||||
#
|
||||
# All parslet atoms are subclasses of {Parslet::Atoms::Base}. You might want to
|
||||
# look at all of those: {Parslet::Atoms::Re}, {Parslet::Atoms::Str},
|
||||
# {Parslet::Atoms::Repetition}, {Parslet::Atoms::Sequence},
|
||||
# {Parslet::Atoms::Alternative}.
|
||||
#
|
||||
# == When things go wrong
|
||||
#
|
||||
# A parse that fails will raise {Parslet::ParseFailed}. This exception contains
|
||||
# all the details of what went wrong, including a detailed error trace that
|
||||
# can be printed out as an ascii tree. ({Parslet::Cause})
|
||||
#
|
||||
module Parslet
|
||||
# Extends classes that include Parslet with the module
|
||||
# {Parslet::ClassMethods}.
|
||||
#
|
||||
def self.included(base)
|
||||
base.extend(ClassMethods)
|
||||
end
|
||||
|
||||
# Raised when the parse failed to match. It contains the message that should
|
||||
# be presented to the user. More details can be extracted from the
|
||||
# exceptions #cause member: It contains an instance of {Parslet::Cause} that
|
||||
# stores all the details of your failed parse in a tree structure.
|
||||
#
|
||||
# begin
|
||||
# parslet.parse(str)
|
||||
# rescue Parslet::ParseFailed => failure
|
||||
# puts failure.cause.ascii_tree
|
||||
# end
|
||||
#
|
||||
# Alternatively, you can just require 'parslet/convenience' and call the
|
||||
# method #parse_with_debug instead of #parse. This method will never raise
|
||||
# and print error trees to stdout.
|
||||
#
|
||||
# require 'parslet/convenience'
|
||||
# parslet.parse_with_debug(str)
|
||||
#
|
||||
class ParseFailed < StandardError
|
||||
def initialize(message, cause=nil)
|
||||
super(message)
|
||||
@cause = cause
|
||||
end
|
||||
|
||||
# Why the parse failed.
|
||||
#
|
||||
# @return [Parslet::Cause]
|
||||
attr_reader :cause
|
||||
end
|
||||
|
||||
module ClassMethods
|
||||
# Define an entity for the parser. This generates a method of the same
|
||||
# name that can be used as part of other patterns. Those methods can be
|
||||
# freely mixed in your parser class with real ruby methods.
|
||||
#
|
||||
# class MyParser
|
||||
# include Parslet
|
||||
#
|
||||
# rule(:bar) { str('bar') }
|
||||
# rule(:twobar) do
|
||||
# bar >> bar
|
||||
# end
|
||||
#
|
||||
# root :twobar
|
||||
# end
|
||||
#
|
||||
def rule(name, &definition)
|
||||
define_method(name) do
|
||||
@rules ||= {} # <name, rule> memoization
|
||||
return @rules[name] if @rules.has_key?(name)
|
||||
|
||||
# Capture the self of the parser class along with the definition.
|
||||
definition_closure = proc {
|
||||
self.instance_eval(&definition)
|
||||
}
|
||||
|
||||
@rules[name] = Atoms::Entity.new(name, &definition_closure)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Allows for delayed construction of #match. See also Parslet.match.
|
||||
#
|
||||
# @api private
|
||||
class DelayedMatchConstructor
|
||||
def [](str)
|
||||
Atoms::Re.new("[" + str + "]")
|
||||
end
|
||||
end
|
||||
|
||||
# Returns an atom matching a character class. All regular expressions can be
|
||||
# used, as long as they match only a single character at a time.
|
||||
#
|
||||
# match('[ab]') # will match either 'a' or 'b'
|
||||
# match('[\n\s]') # will match newlines and spaces
|
||||
#
|
||||
# There is also another (convenience) form of this method:
|
||||
#
|
||||
# match['a-z'] # synonymous to match('[a-z]')
|
||||
# match['\n'] # synonymous to match('[\n]')
|
||||
#
|
||||
# @overload match(str)
|
||||
# @param str [String] character class to match (regexp syntax)
|
||||
# @return [Parslet::Atoms::Re] a parslet atom
|
||||
#
|
||||
def match(str=nil)
|
||||
return DelayedMatchConstructor.new unless str
|
||||
|
||||
return Atoms::Re.new(str)
|
||||
end
|
||||
module_function :match
|
||||
|
||||
# Returns an atom matching the +str+ given:
|
||||
#
|
||||
# str('class') # will match 'class'
|
||||
#
|
||||
# @param str [String] string to match verbatim
|
||||
# @return [Parslet::Atoms::Str] a parslet atom
|
||||
#
|
||||
def str(str)
|
||||
Atoms::Str.new(str)
|
||||
end
|
||||
module_function :str
|
||||
|
||||
# Returns an atom matching any character. It acts like the '.' (dot)
|
||||
# character in regular expressions.
|
||||
#
|
||||
# any.parse('a') # => 'a'
|
||||
#
|
||||
# @return [Parslet::Atoms::Re] a parslet atom
|
||||
#
|
||||
def any
|
||||
Atoms::Re.new('.')
|
||||
end
|
||||
module_function :any
|
||||
|
||||
# Introduces a new capture scope. This means that all old captures stay
|
||||
# accessible, but new values stored will only be available during the block
|
||||
# given and the old values will be restored after the block.
|
||||
#
|
||||
# Example:
|
||||
# # :a will be available until the end of the block. Afterwards,
|
||||
# # :a from the outer scope will be available again, if such a thing
|
||||
# # exists.
|
||||
# scope { str('a').capture(:a) }
|
||||
#
|
||||
def scope(&block)
|
||||
Parslet::Atoms::Scope.new(block)
|
||||
end
|
||||
module_function :scope
|
||||
|
||||
# Designates a piece of the parser as being dynamic. Dynamic parsers can
|
||||
# either return a parser at runtime, which will be applied on the input, or
|
||||
# return a result from a parse.
|
||||
#
|
||||
# Dynamic parse pieces are never cached and can introduce performance
|
||||
# abnormalitites - use sparingly where other constructs fail.
|
||||
#
|
||||
# Example:
|
||||
# # Parses either 'a' or 'b', depending on the weather
|
||||
# dynamic { rand() < 0.5 ? str('a') : str('b') }
|
||||
#
|
||||
def dynamic(&block)
|
||||
Parslet::Atoms::Dynamic.new(block)
|
||||
end
|
||||
module_function :dynamic
|
||||
|
||||
# Returns a parslet atom that parses infix expressions. Operations are
|
||||
# specified as a list of <atom, precedence, associativity> tuples, where
|
||||
# atom is simply the parslet atom that matches an operator, precedence is
|
||||
# a number and associativity is either :left or :right.
|
||||
#
|
||||
# Higher precedence indicates that the operation should bind tighter than
|
||||
# other operations with lower precedence. In common algebra, '+' has
|
||||
# lower precedence than '*'. So you would have a precedence of 1 for '+' and
|
||||
# a precedence of 2 for '*'. Only the order relation between these two
|
||||
# counts, so any number would work.
|
||||
#
|
||||
# Associativity is what decides what interpretation to take for strings that
|
||||
# are ambiguous like '1 + 2 + 3'. If '+' is specified as left associative,
|
||||
# the expression would be interpreted as '(1 + 2) + 3'. If right
|
||||
# associativity is chosen, it would be interpreted as '1 + (2 + 3)'. Note
|
||||
# that the hash trees output reflect that choice as well.
|
||||
#
|
||||
# Example:
|
||||
# infix_expression(integer, [add_op, 1, :left])
|
||||
# # would parse things like '1 + 2'
|
||||
#
|
||||
# @param element [Parslet::Atoms::Base] elements that take the NUMBER position
|
||||
# in the expression
|
||||
# @param operations [Array<(Parslet::Atoms::Base, Integer, {:left, :right})>]
|
||||
#
|
||||
# @see Parslet::Atoms::Infix
|
||||
#
|
||||
def infix_expression(element, *operations)
|
||||
Parslet::Atoms::Infix.new(element, operations)
|
||||
end
|
||||
module_function :infix_expression
|
||||
|
||||
# A special kind of atom that allows embedding whole treetop expressions
|
||||
# into parslet construction.
|
||||
#
|
||||
# # the same as str('a') >> str('b').maybe
|
||||
# exp(%Q("a" "b"?))
|
||||
#
|
||||
# @param str [String] a treetop expression
|
||||
# @return [Parslet::Atoms::Base] the corresponding parslet parser
|
||||
#
|
||||
def exp(str)
|
||||
Parslet::Expression.new(str).to_parslet
|
||||
end
|
||||
module_function :exp
|
||||
|
||||
# Returns a placeholder for a tree transformation that will only match a
|
||||
# sequence of elements. The +symbol+ you specify will be the key for the
|
||||
# matched sequence in the returned dictionary.
|
||||
#
|
||||
# # This would match a body element that contains several declarations.
|
||||
# { :body => sequence(:declarations) }
|
||||
#
|
||||
# The above example would match <code>:body => ['a', 'b']</code>, but not
|
||||
# <code>:body => 'a'</code>.
|
||||
#
|
||||
# see {Parslet::Transform}
|
||||
#
|
||||
def sequence(symbol)
|
||||
Pattern::SequenceBind.new(symbol)
|
||||
end
|
||||
module_function :sequence
|
||||
|
||||
# Returns a placeholder for a tree transformation that will only match
|
||||
# simple elements. This matches everything that <code>#sequence</code>
|
||||
# doesn't match.
|
||||
#
|
||||
# # Matches a single header.
|
||||
# { :header => simple(:header) }
|
||||
#
|
||||
# see {Parslet::Transform}
|
||||
#
|
||||
def simple(symbol)
|
||||
Pattern::SimpleBind.new(symbol)
|
||||
end
|
||||
module_function :simple
|
||||
|
||||
# Returns a placeholder for tree transformation patterns that will match
|
||||
# any kind of subtree.
|
||||
#
|
||||
# { :expression => subtree(:exp) }
|
||||
#
|
||||
def subtree(symbol)
|
||||
Pattern::SubtreeBind.new(symbol)
|
||||
end
|
||||
module_function :subtree
|
||||
|
||||
autoload :Expression, 'parslet/expression'
|
||||
end
|
||||
|
||||
require 'parslet/slice'
|
||||
require 'parslet/cause'
|
||||
require 'parslet/source'
|
||||
require 'parslet/atoms'
|
||||
require 'parslet/pattern'
|
||||
require 'parslet/pattern/binding'
|
||||
require 'parslet/transform'
|
||||
require 'parslet/parser'
|
||||
require 'parslet/error_reporter'
|
||||
require 'parslet/scope'
|
161
lib/parslet/accelerator.rb
Normal file
161
lib/parslet/accelerator.rb
Normal file
@ -0,0 +1,161 @@
|
||||
|
||||
|
||||
# Optimizes the parsers by pattern matching on the parser atoms and replacing
|
||||
# matches with better versions. See the file qed/accelerators.md for a more
|
||||
# in-depth description.
|
||||
#
|
||||
# Example:
|
||||
# quote = str('"')
|
||||
# parser = quote >> (quote.absent? >> any).repeat >> quote
|
||||
#
|
||||
# A = Accelerator # for making what follows a bit shorter
|
||||
# optimized_parser = A.apply(parser,
|
||||
# A.rule( (A.str(:x).absent? >> A.any).repeat ) { GobbleUp.new(x) })
|
||||
#
|
||||
# optimized_parser.parse('"Parsing is now fully optimized! (tm)"')
|
||||
#
|
||||
module Parslet::Accelerator
|
||||
|
||||
# An expression to match against a tree of parser atoms. Normally, an
|
||||
# expression is produced by Parslet::Accelerator.any,
|
||||
# Parslet::Accelerator.str or Parslet::Accelerator.re.
|
||||
#
|
||||
# Expressions can be chained much like parslet atoms can be:
|
||||
#
|
||||
# expr.repeat(1) # matching repetition
|
||||
# expr.absent? # matching absent?
|
||||
# expr.present? # matching present?
|
||||
# expr1 >> expr2 # matching a sequence
|
||||
# expr1 | expr2 # matching an alternation
|
||||
#
|
||||
# @see Parslet::Accelerator.str
|
||||
# @see Parslet::Accelerator.re
|
||||
# @see Parslet::Accelerator.any
|
||||
#
|
||||
# @see Parslet::Accelerator
|
||||
#
|
||||
class Expression
|
||||
attr_reader :type
|
||||
attr_reader :args
|
||||
|
||||
def initialize(type, *args)
|
||||
@type = type
|
||||
@args = args
|
||||
end
|
||||
|
||||
# @return [Expression]
|
||||
def >> other_expr
|
||||
join_or_new :seq, other_expr
|
||||
end
|
||||
|
||||
# @return [Expression]
|
||||
def | other_expr
|
||||
join_or_new :alt, other_expr
|
||||
end
|
||||
|
||||
# @return [Expression]
|
||||
def absent?
|
||||
Expression.new(:absent, self)
|
||||
end
|
||||
# @return [Expression]
|
||||
def present?
|
||||
Expression.new(:present, self)
|
||||
end
|
||||
|
||||
# @return [Expression]
|
||||
def repeat min=0, max=nil
|
||||
Expression.new(:rep, min, max, self)
|
||||
end
|
||||
|
||||
# @return [Expression]
|
||||
def as name
|
||||
Expression.new(:as, name)
|
||||
end
|
||||
|
||||
# @api private
|
||||
# @return [Expression]
|
||||
def join_or_new tag, other_expr
|
||||
if type == tag
|
||||
@args << other_expr
|
||||
else
|
||||
Expression.new(tag, self, other_expr)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
module_function
|
||||
# Returns a match expression that will match `str` parslet atoms.
|
||||
#
|
||||
# @return [Parslet::Accelerator::Expression]
|
||||
#
|
||||
def str variable, *constraints
|
||||
Expression.new(:str, variable, *constraints)
|
||||
end
|
||||
|
||||
# Returns a match expression that will match `match` parslet atoms.
|
||||
#
|
||||
# @return [Parslet::Accelerator::Expression]
|
||||
#
|
||||
def re variable, *constraints
|
||||
Expression.new(:re, variable, *constraints)
|
||||
end
|
||||
|
||||
# Returns a match expression that will match `any` parslet atoms.
|
||||
#
|
||||
# @return [Parslet::Accelerator::Expression]
|
||||
#
|
||||
def any
|
||||
Expression.new(:re, ".")
|
||||
end
|
||||
|
||||
# Given a parslet atom and an expression, will determine if the expression
|
||||
# matches the atom. If successful, returns the bindings into the pattern
|
||||
# that were made. If no bindings had to be made to make the match successful,
|
||||
# the empty hash is returned.
|
||||
#
|
||||
# @param atom [Parslet::Atoms::Base] parslet atom to match against
|
||||
# @param expr [Parslet::Accelerator::Expression] expression to match
|
||||
# @return [nil, Hash] bindings for the match, nil on failure
|
||||
#
|
||||
def match atom, expr
|
||||
engine = Engine.new
|
||||
|
||||
return engine.bindings if engine.match(atom, expr)
|
||||
end
|
||||
|
||||
# Constructs an accelerator rule. A rule is a matching expression and the
|
||||
# code that should be executed once the expression could be bound to a
|
||||
# parser.
|
||||
#
|
||||
# Example:
|
||||
# Accelerator.rule(Accelerator.any) { Parslet.match('.') }
|
||||
#
|
||||
def rule expression, &action
|
||||
[expression, action]
|
||||
end
|
||||
|
||||
# Given a parslet atom and a set of rules, tries to match the rules
|
||||
# recursively through the parslet atom. Once a rule could be matched,
|
||||
# its action block will be called.
|
||||
#
|
||||
# Example:
|
||||
# quote = str('"')
|
||||
# parser = quote >> (quote.absent? >> any).repeat >> quote
|
||||
#
|
||||
# A = Accelerator # for making what follows a bit shorter
|
||||
# optimized_parser = A.apply(parser,
|
||||
# A.rule( (A.str(:x).absent? >> A.any).repeat ) { GobbleUp.new(x) })
|
||||
#
|
||||
# optimized_parser.parse('"Parsing is now fully optimized! (tm)"')
|
||||
#
|
||||
# @param atom [Parslet::Atoms::Base] a parser to optimize
|
||||
# @param *rules [Parslet::Accelerator::Rule] rules produced by .rule
|
||||
# @return [Parslet::Atoms::Base] optimized parser
|
||||
#
|
||||
def apply atom, *rules
|
||||
Application.new(atom, rules).call
|
||||
end
|
||||
end
|
||||
|
||||
require 'parslet/accelerator/engine'
|
||||
require 'parslet/accelerator/application'
|
62
lib/parslet/accelerator/application.rb
Normal file
62
lib/parslet/accelerator/application.rb
Normal file
@ -0,0 +1,62 @@
|
||||
|
||||
# @api private
|
||||
module Parslet::Accelerator
|
||||
class Application
|
||||
def initialize atom, rules
|
||||
@atom = atom
|
||||
@rules = rules
|
||||
end
|
||||
|
||||
def call
|
||||
@atom.accept(self)
|
||||
end
|
||||
|
||||
def visit_parser(root)
|
||||
transform root.accept(self)
|
||||
end
|
||||
def visit_entity(name, block)
|
||||
transform Parslet::Atoms::Entity.new(name) { block.call.accept(self) }
|
||||
end
|
||||
def visit_named(name, atom)
|
||||
transform Parslet::Atoms::Named.new(atom.accept(self), name)
|
||||
end
|
||||
def visit_repetition(tag, min, max, atom)
|
||||
transform Parslet::Atoms::Repetition.new(atom.accept(self), min, max, tag)
|
||||
end
|
||||
def visit_alternative(alternatives)
|
||||
transform Parslet::Atoms::Alternative.new(
|
||||
*alternatives.map { |atom| atom.accept(self) })
|
||||
end
|
||||
def visit_sequence(sequence)
|
||||
transform Parslet::Atoms::Sequence.new(
|
||||
*sequence.map { |atom| atom.accept(self) })
|
||||
end
|
||||
def visit_lookahead(positive, atom)
|
||||
transform Parslet::Atoms::Lookahead.new(atom, positive)
|
||||
end
|
||||
def visit_re(regexp)
|
||||
transform Parslet::Atoms::Re.new(regexp)
|
||||
end
|
||||
def visit_str(str)
|
||||
transform Parslet::Atoms::Str.new(str)
|
||||
end
|
||||
|
||||
def transform atom
|
||||
@rules.each do |expr, action|
|
||||
# Try and match each rule in turn
|
||||
binding = Parslet::Accelerator.match(atom, expr)
|
||||
if binding
|
||||
# On a successful match, allow the rule action to transform the
|
||||
# parslet into something new.
|
||||
ctx = Parslet::Context.new(binding)
|
||||
return ctx.instance_eval(&action)
|
||||
end
|
||||
end # rules.each
|
||||
|
||||
# If no rule matches, this is the fallback - a clean new parslet atom.
|
||||
return atom
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
require 'parslet/context'
|
112
lib/parslet/accelerator/engine.rb
Normal file
112
lib/parslet/accelerator/engine.rb
Normal file
@ -0,0 +1,112 @@
|
||||
|
||||
require 'parslet/atoms/visitor'
|
||||
|
||||
module Parslet::Accelerator
|
||||
# @api private
|
||||
class Apply
|
||||
def initialize(engine, expr)
|
||||
@engine = engine
|
||||
@expr = expr
|
||||
end
|
||||
|
||||
def visit_parser(root)
|
||||
false
|
||||
end
|
||||
def visit_entity(name, block)
|
||||
false
|
||||
end
|
||||
def visit_named(name, atom)
|
||||
match(:as) do |key|
|
||||
@engine.try_bind(key, name)
|
||||
end
|
||||
end
|
||||
def visit_repetition(tag, min, max, atom)
|
||||
match(:rep) do |e_min, e_max, expr|
|
||||
e_min == min && e_max == max && @engine.match(atom, expr)
|
||||
end
|
||||
end
|
||||
def visit_alternative(alternatives)
|
||||
match(:alt) do |*expressions|
|
||||
return false if alternatives.size != expressions.size
|
||||
|
||||
alternatives.zip(expressions).all? do |atom, expr|
|
||||
@engine.match(atom, expr)
|
||||
end
|
||||
end
|
||||
end
|
||||
def visit_sequence(sequence)
|
||||
match(:seq) do |*expressions|
|
||||
return false if sequence.size != expressions.size
|
||||
|
||||
sequence.zip(expressions).all? do |atom, expr|
|
||||
@engine.match(atom, expr)
|
||||
end
|
||||
end
|
||||
end
|
||||
def visit_lookahead(positive, atom)
|
||||
match(:absent) do |expr|
|
||||
return positive == false && @engine.match(atom, expr)
|
||||
end
|
||||
match(:present) do |expr|
|
||||
return positive == true && @engine.match(atom, expr)
|
||||
end
|
||||
end
|
||||
def visit_re(regexp)
|
||||
match(:re) do |*bind_conditions|
|
||||
bind_conditions.all? { |bind_cond|
|
||||
@engine.try_bind(bind_cond, regexp) }
|
||||
end
|
||||
end
|
||||
def visit_str(str)
|
||||
match(:str) do |*bind_conditions|
|
||||
bind_conditions.all? { |bind_cond|
|
||||
@engine.try_bind(bind_cond, str) }
|
||||
end
|
||||
end
|
||||
|
||||
def match(type_tag)
|
||||
expr_tag = @expr.type
|
||||
if expr_tag == type_tag
|
||||
yield *@expr.args
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# @api private
|
||||
class Engine
|
||||
attr_reader :bindings
|
||||
|
||||
def initialize
|
||||
@bindings = {}
|
||||
end
|
||||
|
||||
def match(atom, expr)
|
||||
atom.accept(
|
||||
Apply.new(self, expr))
|
||||
end
|
||||
|
||||
def try_bind(variable, value)
|
||||
if bound? variable
|
||||
return value == lookup(variable)
|
||||
else
|
||||
case variable
|
||||
when Symbol
|
||||
bind(variable, value)
|
||||
else
|
||||
# This does not look like a variable - let's try matching it against
|
||||
# the value:
|
||||
variable === value
|
||||
end
|
||||
end
|
||||
end
|
||||
def bound? var
|
||||
@bindings.has_key? var
|
||||
end
|
||||
def lookup var
|
||||
@bindings[var]
|
||||
end
|
||||
def bind var, val
|
||||
@bindings[var] = val
|
||||
end
|
||||
end
|
||||
end
|
35
lib/parslet/atoms.rb
Normal file
35
lib/parslet/atoms.rb
Normal file
@ -0,0 +1,35 @@
|
||||
|
||||
# This is where parslets name comes from: Small parser atoms.
|
||||
#
|
||||
module Parslet::Atoms
|
||||
# The precedence module controls parenthesis during the #inspect printing
|
||||
# of parslets. It is not relevant to other aspects of the parsing.
|
||||
#
|
||||
module Precedence
|
||||
prec = 0
|
||||
BASE = (prec+=1) # everything else
|
||||
LOOKAHEAD = (prec+=1) # &SOMETHING
|
||||
REPETITION = (prec+=1) # 'a'+, 'a'?
|
||||
SEQUENCE = (prec+=1) # 'a' 'b'
|
||||
ALTERNATE = (prec+=1) # 'a' | 'b'
|
||||
OUTER = (prec+=1) # printing is done here.
|
||||
end
|
||||
|
||||
require 'parslet/atoms/can_flatten'
|
||||
require 'parslet/atoms/context'
|
||||
require 'parslet/atoms/dsl'
|
||||
require 'parslet/atoms/base'
|
||||
require 'parslet/atoms/named'
|
||||
require 'parslet/atoms/lookahead'
|
||||
require 'parslet/atoms/alternative'
|
||||
require 'parslet/atoms/sequence'
|
||||
require 'parslet/atoms/repetition'
|
||||
require 'parslet/atoms/re'
|
||||
require 'parslet/atoms/str'
|
||||
require 'parslet/atoms/entity'
|
||||
require 'parslet/atoms/capture'
|
||||
require 'parslet/atoms/dynamic'
|
||||
require 'parslet/atoms/scope'
|
||||
require 'parslet/atoms/infix'
|
||||
end
|
||||
|
50
lib/parslet/atoms/alternative.rb
Normal file
50
lib/parslet/atoms/alternative.rb
Normal file
@ -0,0 +1,50 @@
|
||||
|
||||
# Alternative during matching. Contains a list of parslets that is tried each
|
||||
# one in turn. Only fails if all alternatives fail.
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# str('a') | str('b') # matches either 'a' or 'b'
|
||||
#
|
||||
class Parslet::Atoms::Alternative < Parslet::Atoms::Base
|
||||
attr_reader :alternatives
|
||||
|
||||
# Constructs an Alternative instance using all given parslets in the order
|
||||
# given. This is what happens if you call '|' on existing parslets, like
|
||||
# this:
|
||||
#
|
||||
# str('a') | str('b')
|
||||
#
|
||||
def initialize(*alternatives)
|
||||
super()
|
||||
|
||||
@alternatives = alternatives
|
||||
@error_msg = "Expected one of #{alternatives.inspect}"
|
||||
end
|
||||
|
||||
#---
|
||||
# Don't construct a hanging tree of Alternative parslets, instead store them
|
||||
# all here. This reduces the number of objects created.
|
||||
#+++
|
||||
def |(parslet)
|
||||
self.class.new(*@alternatives + [parslet])
|
||||
end
|
||||
|
||||
def try(source, context, consume_all)
|
||||
errors = alternatives.map { |a|
|
||||
success, value = result = a.apply(source, context, consume_all)
|
||||
return result if success
|
||||
|
||||
# Aggregate all errors
|
||||
value
|
||||
}
|
||||
|
||||
# If we reach this point, all alternatives have failed.
|
||||
context.err(self, source, @error_msg, errors)
|
||||
end
|
||||
|
||||
precedence ALTERNATE
|
||||
def to_s_inner(prec)
|
||||
alternatives.map { |a| a.to_s(prec) }.join(' / ')
|
||||
end
|
||||
end
|
151
lib/parslet/atoms/base.rb
Normal file
151
lib/parslet/atoms/base.rb
Normal file
@ -0,0 +1,151 @@
|
||||
# Base class for all parslets, handles orchestration of calls and implements
|
||||
# a lot of the operator and chaining methods.
|
||||
#
|
||||
# Also see Parslet::Atoms::DSL chaining parslet atoms together.
|
||||
#
|
||||
class Parslet::Atoms::Base
|
||||
include Parslet::Atoms::Precedence
|
||||
include Parslet::Atoms::DSL
|
||||
include Parslet::Atoms::CanFlatten
|
||||
|
||||
# Given a string or an IO object, this will attempt a parse of its contents
|
||||
# and return a result. If the parse fails, a Parslet::ParseFailed exception
|
||||
# will be thrown.
|
||||
#
|
||||
# @param io [String, Source] input for the parse process
|
||||
# @option options [Parslet::ErrorReporter] :reporter error reporter to use,
|
||||
# defaults to Parslet::ErrorReporter::Tree
|
||||
# @option options [Boolean] :prefix Should a prefix match be accepted?
|
||||
# (default: false)
|
||||
# @return [Hash, Array, Parslet::Slice] PORO (Plain old Ruby object) result
|
||||
# tree
|
||||
#
|
||||
def parse(io, options={})
|
||||
source = io.respond_to?(:line_and_column) ?
|
||||
io :
|
||||
Parslet::Source.new(io)
|
||||
|
||||
# Try to cheat. Assuming that we'll be able to parse the input, don't
|
||||
# run error reporting code.
|
||||
success, value = setup_and_apply(source, nil, !options[:prefix])
|
||||
|
||||
# If we didn't succeed the parse, raise an exception for the user.
|
||||
# Stack trace will be off, but the error tree should explain the reason
|
||||
# it failed.
|
||||
unless success
|
||||
# Cheating has not paid off. Now pay the cost: Rerun the parse,
|
||||
# gathering error information in the process.
|
||||
reporter = options[:reporter] || Parslet::ErrorReporter::Tree.new
|
||||
source.pos = 0
|
||||
success, value = setup_and_apply(source, reporter, !options[:prefix])
|
||||
|
||||
fail "Assertion failed: success was true when parsing with reporter" \
|
||||
if success
|
||||
|
||||
# Value is a Parslet::Cause, which can be turned into an exception:
|
||||
value.raise
|
||||
|
||||
fail "NEVER REACHED"
|
||||
end
|
||||
|
||||
# assert: success is true
|
||||
|
||||
# Extra input is now handled inline with the rest of the parsing. If
|
||||
# really we have success == true, prefix: false and still some input
|
||||
# is left dangling, that is a BUG.
|
||||
if !options[:prefix] && source.chars_left > 0
|
||||
fail "BUG: New error strategy should not reach this point."
|
||||
end
|
||||
|
||||
return flatten(value)
|
||||
end
|
||||
|
||||
# Creates a context for parsing and applies the current atom to the input.
|
||||
# Returns the parse result.
|
||||
#
|
||||
# @return [<Boolean, Object>] Result of the parse. If the first member is
|
||||
# true, the parse has succeeded.
|
||||
def setup_and_apply(source, error_reporter, consume_all)
|
||||
context = Parslet::Atoms::Context.new(error_reporter)
|
||||
apply(source, context, consume_all)
|
||||
end
|
||||
|
||||
# Calls the #try method of this parslet. Success consumes input, error will
|
||||
# rewind the input.
|
||||
#
|
||||
# @param source [Parslet::Source] source to read input from
|
||||
# @param context [Parslet::Atoms::Context] context to use for the parsing
|
||||
# @param consume_all [Boolean] true if the current parse must consume
|
||||
# all input by itself.
|
||||
def apply(source, context, consume_all=false)
|
||||
old_pos = source.pos
|
||||
|
||||
success, value = result = context.try_with_cache(self, source, consume_all)
|
||||
|
||||
if success
|
||||
# If a consume_all parse was made and doesn't result in the consumption
|
||||
# of all the input, that is considered an error.
|
||||
if consume_all && source.chars_left>0
|
||||
# Read 10 characters ahead. Why ten? I don't know.
|
||||
offending_pos = source.pos
|
||||
offending_input = source.consume(10)
|
||||
|
||||
# Rewind input (as happens always in error case)
|
||||
source.pos = old_pos
|
||||
|
||||
return context.err_at(
|
||||
self,
|
||||
source,
|
||||
"Don't know what to do with #{offending_input.to_s.inspect}",
|
||||
offending_pos
|
||||
)
|
||||
end
|
||||
|
||||
# Looks like the parse was successful after all. Don't rewind the input.
|
||||
return result
|
||||
end
|
||||
|
||||
# We only reach this point if the parse has failed. Rewind the input.
|
||||
source.pos = old_pos
|
||||
return result
|
||||
end
|
||||
|
||||
# Override this in your Atoms::Base subclasses to implement parsing
|
||||
# behaviour.
|
||||
#
|
||||
def try(source, context, consume_all)
|
||||
raise NotImplementedError, \
|
||||
"Atoms::Base doesn't have behaviour, please implement #try(source, context)."
|
||||
end
|
||||
|
||||
# Returns true if this atom can be cached in the packrat cache. Most parslet
|
||||
# atoms are cached, so this always returns true, unless overridden.
|
||||
#
|
||||
def cached?
|
||||
true
|
||||
end
|
||||
|
||||
# Debug printing - in Treetop syntax.
|
||||
#
|
||||
def self.precedence(prec)
|
||||
define_method(:precedence) { prec }
|
||||
end
|
||||
precedence BASE
|
||||
def to_s(outer_prec=OUTER)
|
||||
if outer_prec < precedence
|
||||
"("+to_s_inner(precedence)+")"
|
||||
else
|
||||
to_s_inner(precedence)
|
||||
end
|
||||
end
|
||||
def inspect
|
||||
to_s(OUTER)
|
||||
end
|
||||
private
|
||||
|
||||
# Produces an instance of Success and returns it.
|
||||
#
|
||||
def succ(result)
|
||||
[true, result]
|
||||
end
|
||||
end
|
137
lib/parslet/atoms/can_flatten.rb
Normal file
137
lib/parslet/atoms/can_flatten.rb
Normal file
@ -0,0 +1,137 @@
|
||||
|
||||
module Parslet::Atoms
|
||||
# A series of helper functions that have the common topic of flattening
|
||||
# result values into the intermediary tree that consists of Ruby Hashes and
|
||||
# Arrays.
|
||||
#
|
||||
# This module has one main function, #flatten, that takes an annotated
|
||||
# structure as input and returns the reduced form that users expect from
|
||||
# Atom#parse.
|
||||
#
|
||||
# NOTE: Since all of these functions are just that, functions without
|
||||
# side effects, they are in a module and not in a class. Its hard to draw
|
||||
# the line sometimes, but this is beyond.
|
||||
#
|
||||
module CanFlatten
|
||||
# Takes a mixed value coming out of a parslet and converts it to a return
|
||||
# value for the user by dropping things and merging hashes.
|
||||
#
|
||||
# Named is set to true if this result will be embedded in a Hash result from
|
||||
# naming something using <code>.as(...)</code>. It changes the folding
|
||||
# semantics of repetition.
|
||||
#
|
||||
def flatten(value, named=false)
|
||||
# Passes through everything that isn't an array of things
|
||||
return value unless value.instance_of? Array
|
||||
|
||||
# Extracts the s-expression tag
|
||||
tag, *tail = value
|
||||
|
||||
# Merges arrays:
|
||||
result = tail.
|
||||
map { |e| flatten(e) } # first flatten each element
|
||||
|
||||
case tag
|
||||
when :sequence
|
||||
return flatten_sequence(result)
|
||||
when :maybe
|
||||
return named ? result.first : result.first || ''
|
||||
when :repetition
|
||||
return flatten_repetition(result, named)
|
||||
end
|
||||
|
||||
fail "BUG: Unknown tag #{tag.inspect}."
|
||||
end
|
||||
|
||||
# Lisp style fold left where the first element builds the basis for
|
||||
# an inject.
|
||||
#
|
||||
def foldl(list, &block)
|
||||
return '' if list.empty?
|
||||
list[1..-1].inject(list.first, &block)
|
||||
end
|
||||
|
||||
# Flatten results from a sequence of parslets.
|
||||
#
|
||||
# @api private
|
||||
#
|
||||
def flatten_sequence(list)
|
||||
foldl(list.compact) { |r, e| # and then merge flat elements
|
||||
merge_fold(r, e)
|
||||
}
|
||||
end
|
||||
# @api private
|
||||
def merge_fold(l, r)
|
||||
# equal pairs: merge. ----------------------------------------------------
|
||||
if l.class == r.class
|
||||
if l.is_a?(Hash)
|
||||
warn_about_duplicate_keys(l, r)
|
||||
return l.merge(r)
|
||||
else
|
||||
return l + r
|
||||
end
|
||||
end
|
||||
|
||||
# unequal pairs: hoist to same level. ------------------------------------
|
||||
|
||||
# Maybe classes are not equal, but both are stringlike?
|
||||
if l.respond_to?(:to_str) && r.respond_to?(:to_str)
|
||||
# if we're merging a String with a Slice, the slice wins.
|
||||
return r if r.respond_to? :to_slice
|
||||
return l if l.respond_to? :to_slice
|
||||
|
||||
fail "NOTREACHED: What other stringlike classes are there?"
|
||||
end
|
||||
|
||||
# special case: If one of them is a string/slice, the other is more important
|
||||
return l if r.respond_to? :to_str
|
||||
return r if l.respond_to? :to_str
|
||||
|
||||
# otherwise just create an array for one of them to live in
|
||||
return l + [r] if r.class == Hash
|
||||
return [l] + r if l.class == Hash
|
||||
|
||||
fail "Unhandled case when foldr'ing sequence."
|
||||
end
|
||||
|
||||
# Flatten results from a repetition of a single parslet. named indicates
|
||||
# whether the user has named the result or not. If the user has named
|
||||
# the results, we want to leave an empty list alone - otherwise it is
|
||||
# turned into an empty string.
|
||||
#
|
||||
# @api private
|
||||
#
|
||||
def flatten_repetition(list, named)
|
||||
if list.any? { |e| e.instance_of?(Hash) }
|
||||
# If keyed subtrees are in the array, we'll want to discard all
|
||||
# strings inbetween. To keep them, name them.
|
||||
return list.select { |e| e.instance_of?(Hash) }
|
||||
end
|
||||
|
||||
if list.any? { |e| e.instance_of?(Array) }
|
||||
# If any arrays are nested in this array, flatten all arrays to this
|
||||
# level.
|
||||
return list.
|
||||
select { |e| e.instance_of?(Array) }.
|
||||
flatten(1)
|
||||
end
|
||||
|
||||
# Consistent handling of empty lists, when we act on a named result
|
||||
return [] if named && list.empty?
|
||||
|
||||
# If there are only strings, concatenate them and return that.
|
||||
foldl(list) { |s,e| s+e }
|
||||
end
|
||||
|
||||
# That annoying warning 'Duplicate subtrees while merging result' comes
|
||||
# from here. You should add more '.as(...)' names to your intermediary tree.
|
||||
#
|
||||
def warn_about_duplicate_keys(h1, h2)
|
||||
d = h1.keys & h2.keys
|
||||
unless d.empty?
|
||||
warn "Duplicate subtrees while merging result of \n #{self.inspect}\nonly the values"+
|
||||
" of the latter will be kept. (keys: #{d.inspect})"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
38
lib/parslet/atoms/capture.rb
Normal file
38
lib/parslet/atoms/capture.rb
Normal file
@ -0,0 +1,38 @@
|
||||
|
||||
# Stores the result of matching an atom against input in the #captures in
|
||||
# parse context. Doing so will allow you to pull parts of the ongoing parse
|
||||
# out later and use them to match other pieces of input.
|
||||
#
|
||||
# Example:
|
||||
# # After this, context.captures[:an_a] returns 'a'
|
||||
# str('a').capture(:an_a)
|
||||
#
|
||||
# # Capture and use of the capture: (matches either 'aa' or 'bb')
|
||||
# match['ab'].capture(:first) >>
|
||||
# dynamic { |src, ctx| str(ctx.captures[:first]) }
|
||||
#
|
||||
class Parslet::Atoms::Capture < Parslet::Atoms::Base
|
||||
attr_reader :parslet, :name
|
||||
|
||||
def initialize(parslet, name)
|
||||
super()
|
||||
|
||||
@parslet, @name = parslet, name
|
||||
end
|
||||
|
||||
def apply(source, context, consume_all)
|
||||
success, value = result = parslet.apply(source, context, consume_all)
|
||||
|
||||
if success
|
||||
context.captures[name.to_sym] =
|
||||
flatten(value)
|
||||
end
|
||||
|
||||
return result
|
||||
end
|
||||
|
||||
def to_s_inner(prec)
|
||||
"(#{name.inspect} = #{parslet.to_s(prec)})"
|
||||
end
|
||||
end
|
||||
|
91
lib/parslet/atoms/context.rb
Normal file
91
lib/parslet/atoms/context.rb
Normal file
@ -0,0 +1,91 @@
|
||||
module Parslet::Atoms
|
||||
# Helper class that implements a transient cache that maps position and
|
||||
# parslet object to results. This is used for memoization in the packrat
|
||||
# style.
|
||||
#
|
||||
# Also, error reporter is stored here and error reporting happens through
|
||||
# this class. This makes the reporting pluggable.
|
||||
#
|
||||
class Context
|
||||
# @param reporter [#err, #err_at] Error reporter (leave empty for default
|
||||
# reporter)
|
||||
def initialize(reporter=Parslet::ErrorReporter::Tree.new)
|
||||
@cache = Hash.new { |h, k| h[k] = {} }
|
||||
@reporter = reporter
|
||||
@captures = Parslet::Scope.new
|
||||
end
|
||||
|
||||
# Caches a parse answer for obj at source.pos. Applying the same parslet
|
||||
# at one position of input always yields the same result, unless the input
|
||||
# has changed.
|
||||
#
|
||||
# We need the entire source here so we can ask for how many characters
|
||||
# were consumed by a successful parse. Imitation of such a parse must
|
||||
# advance the input pos by the same amount of bytes.
|
||||
#
|
||||
def try_with_cache(obj, source, consume_all)
|
||||
beg = source.pos
|
||||
|
||||
# Not in cache yet? Return early.
|
||||
unless entry = lookup(obj, beg)
|
||||
result = obj.try(source, self, consume_all)
|
||||
|
||||
if obj.cached?
|
||||
set obj, beg, [result, source.pos-beg]
|
||||
end
|
||||
|
||||
return result
|
||||
end
|
||||
|
||||
# the condition in unless has returned true, so entry is not nil.
|
||||
result, advance = entry
|
||||
|
||||
# The data we're skipping here has been read before. (since it is in
|
||||
# the cache) PLUS the actual contents are not interesting anymore since
|
||||
# we know obj matches at beg. So skip reading.
|
||||
source.pos = beg + advance
|
||||
return result
|
||||
end
|
||||
|
||||
# Report an error at a given position.
|
||||
# @see ErrorReporter
|
||||
#
|
||||
def err_at(*args)
|
||||
return [false, @reporter.err_at(*args)] if @reporter
|
||||
return [false, nil]
|
||||
end
|
||||
|
||||
# Report an error.
|
||||
# @see ErrorReporter
|
||||
#
|
||||
def err(*args)
|
||||
return [false, @reporter.err(*args)] if @reporter
|
||||
return [false, nil]
|
||||
end
|
||||
|
||||
# Returns the current captures made on the input (see
|
||||
# Parslet::Atoms::Base#capture). Use as follows:
|
||||
#
|
||||
# context.captures[:foobar] # => returns capture :foobar
|
||||
#
|
||||
attr_reader :captures
|
||||
|
||||
# Starts a new scope. Use the #scope method of Parslet::Atoms::DSL
|
||||
# to call this.
|
||||
#
|
||||
def scope
|
||||
captures.push
|
||||
yield
|
||||
ensure
|
||||
captures.pop
|
||||
end
|
||||
|
||||
private
|
||||
def lookup(obj, pos)
|
||||
@cache[pos][obj]
|
||||
end
|
||||
def set(obj, pos, val)
|
||||
@cache[pos][obj] = val
|
||||
end
|
||||
end
|
||||
end
|
109
lib/parslet/atoms/dsl.rb
Normal file
109
lib/parslet/atoms/dsl.rb
Normal file
@ -0,0 +1,109 @@
|
||||
|
||||
# A mixin module that defines operations that can be called on any subclass
|
||||
# of Parslet::Atoms::Base. These operations make parslets atoms chainable and
|
||||
# allow combination of parslet atoms to form bigger parsers.
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# str('foo') >> str('bar')
|
||||
# str('f').repeat
|
||||
# any.absent? # also called The Epsilon
|
||||
#
|
||||
module Parslet::Atoms::DSL
|
||||
# Construct a new atom that repeats the current atom min times at least and
|
||||
# at most max times. max can be nil to indicate that no maximum is present.
|
||||
#
|
||||
# Example:
|
||||
# # match any number of 'a's
|
||||
# str('a').repeat
|
||||
#
|
||||
# # match between 1 and 3 'a's
|
||||
# str('a').repeat(1,3)
|
||||
#
|
||||
def repeat(min=0, max=nil)
|
||||
Parslet::Atoms::Repetition.new(self, min, max)
|
||||
end
|
||||
|
||||
# Returns a new parslet atom that is only maybe present in the input. This
|
||||
# is synonymous to calling #repeat(0,1). Generated tree value will be
|
||||
# either nil (if atom is not present in the input) or the matched subtree.
|
||||
#
|
||||
# Example:
|
||||
# str('foo').maybe
|
||||
#
|
||||
def maybe
|
||||
Parslet::Atoms::Repetition.new(self, 0, 1, :maybe)
|
||||
end
|
||||
|
||||
# Chains two parslet atoms together as a sequence.
|
||||
#
|
||||
# Example:
|
||||
# str('a') >> str('b')
|
||||
#
|
||||
def >>(parslet)
|
||||
Parslet::Atoms::Sequence.new(self, parslet)
|
||||
end
|
||||
|
||||
# Chains two parslet atoms together to express alternation. A match will
|
||||
# always be attempted with the parslet on the left side first. If it doesn't
|
||||
# match, the right side will be tried.
|
||||
#
|
||||
# Example:
|
||||
# # matches either 'a' OR 'b'
|
||||
# str('a') | str('b')
|
||||
#
|
||||
def |(parslet)
|
||||
Parslet::Atoms::Alternative.new(self, parslet)
|
||||
end
|
||||
|
||||
# Tests for absence of a parslet atom in the input stream without consuming
|
||||
# it.
|
||||
#
|
||||
# Example:
|
||||
# # Only proceed the parse if 'a' is absent.
|
||||
# str('a').absent?
|
||||
#
|
||||
def absent?
|
||||
Parslet::Atoms::Lookahead.new(self, false)
|
||||
end
|
||||
|
||||
# Tests for presence of a parslet atom in the input stream without consuming
|
||||
# it.
|
||||
#
|
||||
# Example:
|
||||
# # Only proceed the parse if 'a' is present.
|
||||
# str('a').present?
|
||||
#
|
||||
def present?
|
||||
Parslet::Atoms::Lookahead.new(self, true)
|
||||
end
|
||||
|
||||
# Alias for present? that will disappear in 2.0 (deprecated)
|
||||
#
|
||||
alias prsnt? present?
|
||||
|
||||
# Alias for absent? that will disappear in 2.0 (deprecated)
|
||||
#
|
||||
alias absnt? absent?
|
||||
|
||||
# Marks a parslet atom as important for the tree output. This must be used
|
||||
# to achieve meaningful output from the #parse method.
|
||||
#
|
||||
# Example:
|
||||
# str('a').as(:b) # will produce {:b => 'a'}
|
||||
#
|
||||
def as(name)
|
||||
Parslet::Atoms::Named.new(self, name)
|
||||
end
|
||||
|
||||
# Captures a part of the input and stores it under the name given. This
|
||||
# is very useful to create self-referential parses. A capture stores
|
||||
# the result of its parse (may be complex) on a successful parse action.
|
||||
#
|
||||
# Example:
|
||||
# str('a').capture(:b) # will store captures[:b] == 'a'
|
||||
#
|
||||
def capture(name)
|
||||
Parslet::Atoms::Capture.new(self, name)
|
||||
end
|
||||
end
|
32
lib/parslet/atoms/dynamic.rb
Normal file
32
lib/parslet/atoms/dynamic.rb
Normal file
@ -0,0 +1,32 @@
|
||||
# Evaluates a block at parse time. The result from the block must be a parser
|
||||
# (something which implements #apply). In the first case, the parser will then
|
||||
# be applied to the input, creating the result.
|
||||
#
|
||||
# Dynamic parses are never cached.
|
||||
#
|
||||
# Example:
|
||||
# dynamic { rand < 0.5 ? str('a') : str('b') }
|
||||
#
|
||||
class Parslet::Atoms::Dynamic < Parslet::Atoms::Base
|
||||
attr_reader :block
|
||||
|
||||
def initialize(block)
|
||||
@block = block
|
||||
end
|
||||
|
||||
def cached?
|
||||
false
|
||||
end
|
||||
|
||||
def try(source, context, consume_all)
|
||||
result = block.call(source, context)
|
||||
|
||||
# Result is a parslet atom.
|
||||
return result.apply(source, context, consume_all)
|
||||
end
|
||||
|
||||
def to_s_inner(prec)
|
||||
"dynamic { ... }"
|
||||
end
|
||||
end
|
||||
|
41
lib/parslet/atoms/entity.rb
Normal file
41
lib/parslet/atoms/entity.rb
Normal file
@ -0,0 +1,41 @@
|
||||
# This wraps pieces of parslet definition and gives them a name. The wrapped
|
||||
# piece is lazily evaluated and cached. This has two purposes:
|
||||
#
|
||||
# * Avoid infinite recursion during evaluation of the definition
|
||||
# * Be able to print things by their name, not by their sometimes
|
||||
# complicated content.
|
||||
#
|
||||
# You don't normally use this directly, instead you should generated it by
|
||||
# using the structuring method Parslet.rule.
|
||||
#
|
||||
class Parslet::Atoms::Entity < Parslet::Atoms::Base
|
||||
attr_reader :name, :block
|
||||
def initialize(name, &block)
|
||||
super()
|
||||
|
||||
@name = name
|
||||
@block = block
|
||||
end
|
||||
|
||||
def try(source, context, consume_all)
|
||||
parslet.apply(source, context, consume_all)
|
||||
end
|
||||
|
||||
def parslet
|
||||
@parslet ||= @block.call.tap { |p|
|
||||
raise_not_implemented unless p
|
||||
}
|
||||
end
|
||||
|
||||
def to_s_inner(prec)
|
||||
name.to_s.upcase
|
||||
end
|
||||
private
|
||||
def raise_not_implemented
|
||||
trace = caller.reject {|l| l =~ %r{#{Regexp.escape(__FILE__)}}} # blatantly stolen from dependencies.rb in activesupport
|
||||
exception = NotImplementedError.new("rule(#{name.inspect}) { ... } returns nil. Still not implemented, but already used?")
|
||||
exception.set_backtrace(trace)
|
||||
|
||||
raise exception
|
||||
end
|
||||
end
|
121
lib/parslet/atoms/infix.rb
Normal file
121
lib/parslet/atoms/infix.rb
Normal file
@ -0,0 +1,121 @@
|
||||
class Parslet::Atoms::Infix < Parslet::Atoms::Base
|
||||
attr_reader :element, :operations
|
||||
|
||||
def initialize(element, operations)
|
||||
super()
|
||||
|
||||
@element = element
|
||||
@operations = operations
|
||||
end
|
||||
|
||||
def try(source, context, consume_all)
|
||||
return catch_error {
|
||||
return succ(
|
||||
produce_tree(
|
||||
precedence_climb(source, context, consume_all)))
|
||||
}
|
||||
end
|
||||
|
||||
# Turns an array of the form ['1', '+', ['2', '*', '3']] into a hash that
|
||||
# reflects the same structure.
|
||||
#
|
||||
def produce_tree(ary)
|
||||
return ary unless ary.kind_of? Array
|
||||
|
||||
left = ary.shift
|
||||
|
||||
until ary.empty?
|
||||
op, right = ary.shift(2)
|
||||
|
||||
# p [left, op, right]
|
||||
|
||||
if right.kind_of? Array
|
||||
# Subexpression -> Subhash
|
||||
left = {l: left, o: op, r: produce_tree(right)}
|
||||
else
|
||||
left = {l: left, o: op, r: right}
|
||||
end
|
||||
end
|
||||
|
||||
left
|
||||
end
|
||||
|
||||
# A precedence climbing algorithm married to parslet, as described here
|
||||
# http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing/
|
||||
#
|
||||
# @note Error handling in this routine is done by throwing :error and
|
||||
# as a value the error to return to parslet. This avoids cluttering
|
||||
# the recursion logic here with parslet error handling.
|
||||
#
|
||||
def precedence_climb(source, context, consume_all, current_prec=1, needs_element=false)
|
||||
result = []
|
||||
|
||||
# To even begin parsing an arithmetic expression, there needs to be
|
||||
# at least one @element.
|
||||
success, value = @element.apply(source, context, false)
|
||||
|
||||
unless success
|
||||
abort context.err(self, source, "#{@element.inspect} was expected", [value])
|
||||
end
|
||||
|
||||
result << flatten(value, true)
|
||||
|
||||
# Loop until we fail on operator matching or until input runs out.
|
||||
loop do
|
||||
op_pos = source.pos
|
||||
op_match, prec, assoc = match_operation(source, context, false)
|
||||
|
||||
# If no operator could be matched here, one of several cases
|
||||
# applies:
|
||||
#
|
||||
# - end of file
|
||||
# - end of expression
|
||||
# - syntax error
|
||||
#
|
||||
# We abort matching the expression here.
|
||||
break unless op_match
|
||||
|
||||
if prec >= current_prec
|
||||
next_prec = (assoc == :left) ? prec+1 : prec
|
||||
|
||||
result << op_match
|
||||
result << precedence_climb(
|
||||
source, context, consume_all, next_prec, true)
|
||||
else
|
||||
source.pos = op_pos
|
||||
return unwrap(result)
|
||||
end
|
||||
end
|
||||
|
||||
return unwrap(result)
|
||||
end
|
||||
|
||||
def unwrap expr
|
||||
expr.size == 1 ? expr.first : expr
|
||||
end
|
||||
|
||||
def match_operation(source, context, consume_all)
|
||||
errors = []
|
||||
@operations.each do |op_atom, prec, assoc|
|
||||
success, value = op_atom.apply(source, context, consume_all)
|
||||
return flatten(value, true), prec, assoc if success
|
||||
|
||||
# assert: this was in fact an error, accumulate
|
||||
errors << value
|
||||
end
|
||||
|
||||
return nil
|
||||
end
|
||||
|
||||
def abort(error)
|
||||
throw :error, error
|
||||
end
|
||||
def catch_error
|
||||
catch(:error) { yield }
|
||||
end
|
||||
|
||||
def to_s_inner(prec)
|
||||
ops = @operations.map { |o, _, _| o.inspect }.join(', ')
|
||||
"infix_expression(#{@element.inspect}, [#{ops}])"
|
||||
end
|
||||
end
|
49
lib/parslet/atoms/lookahead.rb
Normal file
49
lib/parslet/atoms/lookahead.rb
Normal file
@ -0,0 +1,49 @@
|
||||
# Either positive or negative lookahead, doesn't consume its input.
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# str('foo').present? # matches when the input contains 'foo', but leaves it
|
||||
#
|
||||
class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
|
||||
attr_reader :positive
|
||||
attr_reader :bound_parslet
|
||||
|
||||
def initialize(bound_parslet, positive=true)
|
||||
super()
|
||||
|
||||
# Model positive and negative lookahead by testing this flag.
|
||||
@positive = positive
|
||||
@bound_parslet = bound_parslet
|
||||
|
||||
@error_msgs = {
|
||||
:positive => ["Input should start with ", bound_parslet],
|
||||
:negative => ["Input should not start with ", bound_parslet]
|
||||
}
|
||||
end
|
||||
|
||||
def try(source, context, consume_all)
|
||||
pos = source.pos
|
||||
|
||||
success, value = bound_parslet.apply(source, context, consume_all)
|
||||
|
||||
if positive
|
||||
return succ(nil) if success
|
||||
return context.err_at(self, source, @error_msgs[:positive], pos)
|
||||
else
|
||||
return succ(nil) unless success
|
||||
return context.err_at(self, source, @error_msgs[:negative], pos)
|
||||
end
|
||||
|
||||
# This is probably the only parslet that rewinds its input in #try.
|
||||
# Lookaheads NEVER consume their input, even on success, that's why.
|
||||
ensure
|
||||
source.pos = pos
|
||||
end
|
||||
|
||||
precedence LOOKAHEAD
|
||||
def to_s_inner(prec)
|
||||
char = positive ? '&' : '!'
|
||||
|
||||
"#{char}#{bound_parslet.to_s(prec)}"
|
||||
end
|
||||
end
|
32
lib/parslet/atoms/named.rb
Normal file
32
lib/parslet/atoms/named.rb
Normal file
@ -0,0 +1,32 @@
|
||||
# Names a match to influence tree construction.
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# str('foo') # will return 'foo',
|
||||
# str('foo').as(:foo) # will return :foo => 'foo'
|
||||
#
|
||||
class Parslet::Atoms::Named < Parslet::Atoms::Base
|
||||
attr_reader :parslet, :name
|
||||
def initialize(parslet, name)
|
||||
super()
|
||||
|
||||
@parslet, @name = parslet, name
|
||||
end
|
||||
|
||||
def apply(source, context, consume_all)
|
||||
success, value = result = parslet.apply(source, context, consume_all)
|
||||
|
||||
return result unless success
|
||||
succ(
|
||||
produce_return_value(
|
||||
value))
|
||||
end
|
||||
|
||||
def to_s_inner(prec)
|
||||
"#{name}:#{parslet.to_s(prec)}"
|
||||
end
|
||||
private
|
||||
def produce_return_value(val)
|
||||
{ name => flatten(val, true) }
|
||||
end
|
||||
end
|
38
lib/parslet/atoms/re.rb
Normal file
38
lib/parslet/atoms/re.rb
Normal file
@ -0,0 +1,38 @@
|
||||
# Matches a special kind of regular expression that only ever matches one
|
||||
# character at a time. Useful members of this family are: <code>character
|
||||
# ranges, \\w, \\d, \\r, \\n, ...</code>
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# match('[a-z]') # matches a-z
|
||||
# match('\s') # like regexps: matches space characters
|
||||
#
|
||||
class Parslet::Atoms::Re < Parslet::Atoms::Base
|
||||
attr_reader :match, :re
|
||||
def initialize(match)
|
||||
super()
|
||||
|
||||
@match = match.to_s
|
||||
@re = Regexp.new(self.match, Regexp::MULTILINE)
|
||||
@error_msgs = {
|
||||
:premature => "Premature end of input",
|
||||
:failed => "Failed to match #{match.inspect[1..-2]}"
|
||||
}
|
||||
end
|
||||
|
||||
def try(source, context, consume_all)
|
||||
return succ(source.consume(1)) if source.matches?(@re)
|
||||
|
||||
# No string could be read
|
||||
return context.err(self, source, @error_msgs[:premature]) \
|
||||
if source.chars_left < 1
|
||||
|
||||
# No match
|
||||
return context.err(self, source, @error_msgs[:failed])
|
||||
end
|
||||
|
||||
def to_s_inner(prec)
|
||||
match.inspect[1..-2]
|
||||
end
|
||||
end
|
||||
|
83
lib/parslet/atoms/repetition.rb
Normal file
83
lib/parslet/atoms/repetition.rb
Normal file
@ -0,0 +1,83 @@
|
||||
|
||||
# Matches a parslet repeatedly.
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# str('a').repeat(1,3) # matches 'a' at least once, but at most three times
|
||||
# str('a').maybe # matches 'a' if it is present in the input (repeat(0,1))
|
||||
#
|
||||
class Parslet::Atoms::Repetition < Parslet::Atoms::Base
|
||||
attr_reader :min, :max, :parslet
|
||||
def initialize(parslet, min, max, tag=:repetition)
|
||||
super()
|
||||
|
||||
raise ArgumentError,
|
||||
"Asking for zero repetitions of a parslet. (#{parslet.inspect} repeating #{min},#{max})" \
|
||||
if max == 0
|
||||
|
||||
|
||||
@parslet = parslet
|
||||
@min, @max = min, max
|
||||
@tag = tag
|
||||
@error_msgs = {
|
||||
:minrep => "Expected at least #{min} of #{parslet.inspect}",
|
||||
:unconsumed => "Extra input after last repetition"
|
||||
}
|
||||
end
|
||||
|
||||
def try(source, context, consume_all)
|
||||
occ = 0
|
||||
accum = [@tag] # initialize the result array with the tag (for flattening)
|
||||
start_pos = source.pos
|
||||
|
||||
break_on = nil
|
||||
loop do
|
||||
success, value = parslet.apply(source, context, false)
|
||||
|
||||
break_on = value
|
||||
break unless success
|
||||
|
||||
occ += 1
|
||||
accum << value
|
||||
|
||||
# If we're not greedy (max is defined), check if that has been reached.
|
||||
return succ(accum) if max && occ>=max
|
||||
end
|
||||
|
||||
# Last attempt to match parslet was a failure, failure reason in break_on.
|
||||
|
||||
# Greedy matcher has produced a failure. Check if occ (which will
|
||||
# contain the number of successes) is >= min.
|
||||
return context.err_at(
|
||||
self,
|
||||
source,
|
||||
@error_msgs[:minrep],
|
||||
start_pos,
|
||||
[break_on]) if occ < min
|
||||
|
||||
# consume_all is true, that means that we're inside the part of the parser
|
||||
# that should consume the input completely. Repetition failing here means
|
||||
# probably that we didn't.
|
||||
#
|
||||
# We have a special clause to create an error here because otherwise
|
||||
# break_on would get thrown away. It turns out, that contains very
|
||||
# interesting information in a lot of cases.
|
||||
#
|
||||
return context.err(
|
||||
self,
|
||||
source,
|
||||
@error_msgs[:unconsumed],
|
||||
[break_on]) if consume_all && source.chars_left>0
|
||||
|
||||
return succ(accum)
|
||||
end
|
||||
|
||||
precedence REPETITION
|
||||
def to_s_inner(prec)
|
||||
minmax = "{#{min}, #{max}}"
|
||||
minmax = '?' if min == 0 && max == 1
|
||||
|
||||
parslet.to_s(prec) + minmax
|
||||
end
|
||||
end
|
||||
|
26
lib/parslet/atoms/scope.rb
Normal file
26
lib/parslet/atoms/scope.rb
Normal file
@ -0,0 +1,26 @@
|
||||
# Starts a new scope in the parsing process. Please also see the #captures
|
||||
# method.
|
||||
#
|
||||
class Parslet::Atoms::Scope < Parslet::Atoms::Base
|
||||
attr_reader :block
|
||||
def initialize(block)
|
||||
super()
|
||||
|
||||
@block = block
|
||||
end
|
||||
|
||||
def cached?
|
||||
false
|
||||
end
|
||||
|
||||
def apply(source, context, consume_all)
|
||||
context.scope do
|
||||
parslet = block.call
|
||||
return parslet.apply(source, context, consume_all)
|
||||
end
|
||||
end
|
||||
|
||||
def to_s_inner(prec)
|
||||
"scope { #{block.call.to_s(prec)} }"
|
||||
end
|
||||
end
|
45
lib/parslet/atoms/sequence.rb
Normal file
45
lib/parslet/atoms/sequence.rb
Normal file
@ -0,0 +1,45 @@
|
||||
# A sequence of parslets, matched from left to right. Denoted by '>>'
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# str('a') >> str('b') # matches 'a', then 'b'
|
||||
#
|
||||
class Parslet::Atoms::Sequence < Parslet::Atoms::Base
|
||||
attr_reader :parslets
|
||||
def initialize(*parslets)
|
||||
super()
|
||||
|
||||
@parslets = parslets
|
||||
@error_msgs = {
|
||||
:failed => "Failed to match sequence (#{self.inspect})"
|
||||
}
|
||||
end
|
||||
|
||||
def >>(parslet)
|
||||
self.class.new(* @parslets+[parslet])
|
||||
end
|
||||
|
||||
def try(source, context, consume_all)
|
||||
# Presize an array
|
||||
result = Array.new(parslets.size + 1)
|
||||
result[0] = :sequence
|
||||
|
||||
parslets.each_with_index do |p, idx|
|
||||
child_consume_all = consume_all && (idx == parslets.size-1)
|
||||
success, value = p.apply(source, context, child_consume_all)
|
||||
|
||||
unless success
|
||||
return context.err(self, source, @error_msgs[:failed], [value])
|
||||
end
|
||||
|
||||
result[idx+1] = value
|
||||
end
|
||||
|
||||
return succ(result)
|
||||
end
|
||||
|
||||
precedence SEQUENCE
|
||||
def to_s_inner(prec)
|
||||
parslets.map { |p| p.to_s(prec) }.join(' ')
|
||||
end
|
||||
end
|
39
lib/parslet/atoms/str.rb
Normal file
39
lib/parslet/atoms/str.rb
Normal file
@ -0,0 +1,39 @@
|
||||
# Matches a string of characters.
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# str('foo') # matches 'foo'
|
||||
#
|
||||
class Parslet::Atoms::Str < Parslet::Atoms::Base
|
||||
attr_reader :str
|
||||
def initialize(str)
|
||||
super()
|
||||
|
||||
@str = str.to_s
|
||||
@pat = Regexp.new(Regexp.escape(str))
|
||||
@len = str.size
|
||||
@error_msgs = {
|
||||
:premature => "Premature end of input",
|
||||
:failed => "Expected #{str.inspect}, but got "
|
||||
}
|
||||
end
|
||||
|
||||
def try(source, context, consume_all)
|
||||
return succ(source.consume(@len)) if source.matches?(@pat)
|
||||
|
||||
# Input ending early:
|
||||
return context.err(self, source, @error_msgs[:premature]) \
|
||||
if source.chars_left<@len
|
||||
|
||||
# Expected something, but got something else instead:
|
||||
error_pos = source.pos
|
||||
return context.err_at(
|
||||
self, source,
|
||||
[@error_msgs[:failed], source.consume(@len)], error_pos)
|
||||
end
|
||||
|
||||
def to_s_inner(prec)
|
||||
"'#{str}'"
|
||||
end
|
||||
end
|
||||
|
89
lib/parslet/atoms/visitor.rb
Normal file
89
lib/parslet/atoms/visitor.rb
Normal file
@ -0,0 +1,89 @@
|
||||
# Augments all parslet atoms with an accept method that will call back
|
||||
# to the visitor given.
|
||||
|
||||
#
|
||||
module Parslet::Atoms
|
||||
class Base
|
||||
def accept(visitor)
|
||||
raise NotImplementedError, "No #accept method on #{self.class.name}."
|
||||
end
|
||||
end
|
||||
|
||||
class Str
|
||||
# Call back visitors #visit_str method. See parslet/export for an example.
|
||||
#
|
||||
def accept(visitor)
|
||||
visitor.visit_str(str)
|
||||
end
|
||||
end
|
||||
|
||||
class Entity
|
||||
# Call back visitors #visit_entity method. See parslet/export for an
|
||||
# example.
|
||||
#
|
||||
def accept(visitor)
|
||||
visitor.visit_entity(name, block)
|
||||
end
|
||||
end
|
||||
|
||||
class Named
|
||||
# Call back visitors #visit_named method. See parslet/export for an
|
||||
# example.
|
||||
#
|
||||
def accept(visitor)
|
||||
visitor.visit_named(name, parslet)
|
||||
end
|
||||
end
|
||||
|
||||
class Sequence
|
||||
# Call back visitors #visit_sequence method. See parslet/export for an
|
||||
# example.
|
||||
#
|
||||
def accept(visitor)
|
||||
visitor.visit_sequence(parslets)
|
||||
end
|
||||
end
|
||||
|
||||
class Repetition
|
||||
# Call back visitors #visit_repetition method. See parslet/export for an
|
||||
# example.
|
||||
#
|
||||
def accept(visitor)
|
||||
visitor.visit_repetition(@tag, min, max, parslet)
|
||||
end
|
||||
end
|
||||
|
||||
class Alternative
|
||||
# Call back visitors #visit_alternative method. See parslet/export for an
|
||||
# example.
|
||||
#
|
||||
def accept(visitor)
|
||||
visitor.visit_alternative(alternatives)
|
||||
end
|
||||
end
|
||||
|
||||
class Lookahead
|
||||
# Call back visitors #visit_lookahead method. See parslet/export for an
|
||||
# example.
|
||||
#
|
||||
def accept(visitor)
|
||||
visitor.visit_lookahead(positive, bound_parslet)
|
||||
end
|
||||
end
|
||||
|
||||
class Re
|
||||
# Call back visitors #visit_re method. See parslet/export for an example.
|
||||
#
|
||||
def accept(visitor)
|
||||
visitor.visit_re(match)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class Parslet::Parser
|
||||
# Call back visitors #visit_parser method.
|
||||
#
|
||||
def accept(visitor)
|
||||
visitor.visit_parser(root)
|
||||
end
|
||||
end
|
94
lib/parslet/cause.rb
Normal file
94
lib/parslet/cause.rb
Normal file
@ -0,0 +1,94 @@
|
||||
module Parslet
|
||||
# Represents a cause why a parse did fail. A lot of these objects are
|
||||
# constructed - not all of the causes turn out to be failures for the whole
|
||||
# parse.
|
||||
#
|
||||
class Cause
|
||||
def initialize(message, source, pos, children)
|
||||
@message, @source, @pos, @children =
|
||||
message, source, pos, children
|
||||
end
|
||||
|
||||
# @return [String, Array] A string or an array of message pieces that
|
||||
# provide failure information. Use #to_s to get a formatted string.
|
||||
attr_reader :message
|
||||
|
||||
# @return [Parslet::Source] Source that was parsed when this error
|
||||
# happend. Mainly used for line number information.
|
||||
attr_reader :source
|
||||
|
||||
# Location of the error.
|
||||
#
|
||||
# @return [Fixnum] Position where the error happened. (character offset)
|
||||
attr_reader :pos
|
||||
|
||||
# When this cause is part of a tree of error causes: child nodes for this
|
||||
# node. Very often carries the reasons for this cause.
|
||||
#
|
||||
# @return [Array<Parslet::Cause>] A list of reasons for this cause.
|
||||
def children
|
||||
@children ||= []
|
||||
end
|
||||
|
||||
# Appends 'at line LINE char CHAR' to the string given. Use +pos+ to
|
||||
# override the position of the +source+. This method returns an object
|
||||
# that can be turned into a string using #to_s.
|
||||
#
|
||||
# @param source [Parslet::Source] source that was parsed when this error
|
||||
# happened
|
||||
# @param pos [Fixnum] position of error
|
||||
# @param str [String, Array<String>] message parts
|
||||
# @param children [Array<Parslet::Cause>] child nodes for this error tree
|
||||
# @return [Parslet::Cause] a new instance of {Parslet::Cause}
|
||||
#
|
||||
def self.format(source, pos, str, children=[])
|
||||
self.new(str, source, pos, children)
|
||||
end
|
||||
|
||||
def to_s
|
||||
line, column = source.line_and_column(pos)
|
||||
# Allow message to be a list of objects. Join them here, since we now
|
||||
# really need it.
|
||||
Array(message).map { |o|
|
||||
o.respond_to?(:to_slice) ?
|
||||
o.str.inspect :
|
||||
o.to_s }.join + " at line #{line} char #{column}."
|
||||
end
|
||||
|
||||
# Signals to the outside that the parse has failed. Use this in
|
||||
# conjunction with .format for nice error messages.
|
||||
#
|
||||
def raise(exception_klass=Parslet::ParseFailed)
|
||||
exception = exception_klass.new(self.to_s, self)
|
||||
Kernel.raise exception
|
||||
end
|
||||
|
||||
# Returns an ascii tree representation of the causes of this node and its
|
||||
# children.
|
||||
#
|
||||
def ascii_tree
|
||||
StringIO.new.tap { |io|
|
||||
recursive_ascii_tree(self, io, [true]) }.
|
||||
string
|
||||
end
|
||||
|
||||
private
|
||||
def recursive_ascii_tree(node, stream, curved)
|
||||
append_prefix(stream, curved)
|
||||
stream.puts node.to_s
|
||||
|
||||
node.children.each do |child|
|
||||
last_child = (node.children.last == child)
|
||||
|
||||
recursive_ascii_tree(child, stream, curved + [last_child])
|
||||
end
|
||||
end
|
||||
def append_prefix(stream, curved)
|
||||
return if curved.size < 2
|
||||
curved[1..-2].each do |c|
|
||||
stream.print c ? " " : "| "
|
||||
end
|
||||
stream.print curved.last ? "`- " : "|- "
|
||||
end
|
||||
end
|
||||
end
|
33
lib/parslet/context.rb
Normal file
33
lib/parslet/context.rb
Normal file
@ -0,0 +1,33 @@
|
||||
require 'blankslate'
|
||||
|
||||
# Provides a context for tree transformations to run in. The context allows
|
||||
# accessing each of the bindings in the bindings hash as local method.
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# ctx = Context.new(:a => :b)
|
||||
# ctx.instance_eval do
|
||||
# a # => :b
|
||||
# end
|
||||
#
|
||||
# @api private
|
||||
class Parslet::Context < BlankSlate
|
||||
reveal :methods
|
||||
reveal :respond_to?
|
||||
reveal :inspect
|
||||
reveal :to_s
|
||||
reveal :instance_variable_set
|
||||
|
||||
def meta_def(name, &body)
|
||||
metaclass = class <<self; self; end
|
||||
|
||||
metaclass.send(:define_method, name, &body)
|
||||
end
|
||||
|
||||
def initialize(bindings)
|
||||
bindings.each do |key, value|
|
||||
meta_def(key.to_sym) { value }
|
||||
instance_variable_set("@#{key}", value)
|
||||
end
|
||||
end
|
||||
end
|
33
lib/parslet/convenience.rb
Normal file
33
lib/parslet/convenience.rb
Normal file
@ -0,0 +1,33 @@
|
||||
class Parslet::Atoms::Base
|
||||
|
||||
# Packages the common idiom
|
||||
#
|
||||
# begin
|
||||
# tree = parser.parse('something')
|
||||
# rescue Parslet::ParseFailed => error
|
||||
# puts parser.cause.ascii_tree
|
||||
# end
|
||||
#
|
||||
# into a convenient method.
|
||||
#
|
||||
# Usage:
|
||||
#
|
||||
# require 'parslet'
|
||||
# require 'parslet/convenience'
|
||||
#
|
||||
# class FooParser < Parslet::Parser
|
||||
# rule(:foo) { str('foo') }
|
||||
# root(:foo)
|
||||
# end
|
||||
#
|
||||
# FooParser.new.parse_with_debug('bar')
|
||||
#
|
||||
# @see Parslet::Atoms::Base#parse
|
||||
#
|
||||
def parse_with_debug str, opts={}
|
||||
parse str, opts
|
||||
rescue Parslet::ParseFailed => error
|
||||
puts error.cause.ascii_tree
|
||||
end
|
||||
|
||||
end
|
7
lib/parslet/error_reporter.rb
Normal file
7
lib/parslet/error_reporter.rb
Normal file
@ -0,0 +1,7 @@
|
||||
# A namespace for all error reporters.
|
||||
#
|
||||
module Parslet::ErrorReporter
|
||||
end
|
||||
|
||||
require 'parslet/error_reporter/tree'
|
||||
require 'parslet/error_reporter/deepest'
|
95
lib/parslet/error_reporter/deepest.rb
Normal file
95
lib/parslet/error_reporter/deepest.rb
Normal file
@ -0,0 +1,95 @@
|
||||
module Parslet
|
||||
module ErrorReporter
|
||||
# Instead of reporting the latest error that happens like {Tree} does,
|
||||
# this class reports the deepest error. Depth is defined here as how
|
||||
# advanced into the input an error happens. The errors close to the
|
||||
# greatest depth tend to be more relevant to the end user, since they
|
||||
# specify what could be done to make them go away.
|
||||
#
|
||||
# More specifically, errors produced by this reporter won't be related to
|
||||
# the structure of the grammar at all. The positions of the errors will
|
||||
# be advanced and convey at every grammar level what the deepest rule
|
||||
# was to fail.
|
||||
#
|
||||
class Deepest
|
||||
def initialize
|
||||
@deepest_cause = nil
|
||||
end
|
||||
|
||||
# Produces an error cause that combines the message at the current level
|
||||
# with the errors that happened at a level below (children).
|
||||
#
|
||||
# @param atom [Parslet::Atoms::Base] parslet that failed
|
||||
# @param source [Source] Source that we're using for this parse. (line
|
||||
# number information...)
|
||||
# @param message [String, Array] Error message at this level.
|
||||
# @param children [Array] A list of errors from a deeper level (or nil).
|
||||
# @return [Cause] An error tree combining children with message.
|
||||
#
|
||||
def err(atom, source, message, children=nil)
|
||||
position = source.pos
|
||||
cause = Cause.format(source, position, message, children)
|
||||
return deepest(cause)
|
||||
end
|
||||
|
||||
# Produces an error cause that combines the message at the current level
|
||||
# with the errors that happened at a level below (children).
|
||||
#
|
||||
# @param atom [Parslet::Atoms::Base] parslet that failed
|
||||
# @param source [Source] Source that we're using for this parse. (line
|
||||
# number information...)
|
||||
# @param message [String, Array] Error message at this level.
|
||||
# @param pos [Fixnum] The real position of the error.
|
||||
# @param children [Array] A list of errors from a deeper level (or nil).
|
||||
# @return [Cause] An error tree combining children with message.
|
||||
#
|
||||
def err_at(atom, source, message, pos, children=nil)
|
||||
position = pos
|
||||
cause = Cause.format(source, position, message, children)
|
||||
return deepest(cause)
|
||||
end
|
||||
|
||||
# Returns the cause that is currently deepest. Mainly for specs.
|
||||
#
|
||||
attr_reader :deepest_cause
|
||||
|
||||
# Checks to see if the lineage of the cause given includes a cause with
|
||||
# an error position deeper than the current deepest cause stored. If
|
||||
# yes, it passes the cause through to the caller. If no, it returns the
|
||||
# current deepest error that was saved as a reference.
|
||||
#
|
||||
def deepest(cause)
|
||||
rank, leaf = deepest_child(cause)
|
||||
|
||||
if !deepest_cause || leaf.pos >= deepest_cause.pos
|
||||
# This error reaches deeper into the input, save it as reference.
|
||||
@deepest_cause = leaf
|
||||
return cause
|
||||
end
|
||||
|
||||
return deepest_cause
|
||||
end
|
||||
|
||||
private
|
||||
# Returns the leaf from a given error tree with the biggest rank.
|
||||
#
|
||||
def deepest_child(cause, rank=0)
|
||||
max_child = cause
|
||||
max_rank = rank
|
||||
|
||||
if cause.children && !cause.children.empty?
|
||||
cause.children.each do |child|
|
||||
c_rank, c_cause = deepest_child(child, rank+1)
|
||||
|
||||
if c_rank > max_rank
|
||||
max_rank = c_rank
|
||||
max_child = c_cause
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return max_rank, max_child
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
57
lib/parslet/error_reporter/tree.rb
Normal file
57
lib/parslet/error_reporter/tree.rb
Normal file
@ -0,0 +1,57 @@
|
||||
module Parslet
|
||||
module ErrorReporter
|
||||
# An error reporter has two central methods, one for reporting errors at
|
||||
# the current parse position (#err) and one for reporting errors at a
|
||||
# given parse position (#err_at). The reporter can return an object (a
|
||||
# 'cause') that will be returned to the caller along with the information
|
||||
# that the parse failed.
|
||||
#
|
||||
# When reporting errors on the outer levels of your parser, these methods
|
||||
# get passed a list of error objects ('causes') from the inner levels. In
|
||||
# this default implementation, the inner levels are considered error
|
||||
# subtrees and are appended to the generated tree node at each level,
|
||||
# thereby constructing an error tree.
|
||||
#
|
||||
# This error tree will report in parallel with the grammar structure that
|
||||
# failed. A one-to-one correspondence exists between each error in the
|
||||
# tree and the parslet atom that produced that error.
|
||||
#
|
||||
# The implementor is really free to use these return values as he sees
|
||||
# fit. One example would be to return an error state object from these
|
||||
# methods that is then updated as errors cascade up the parse derivation
|
||||
# tree.
|
||||
#
|
||||
class Tree
|
||||
# Produces an error cause that combines the message at the current level
|
||||
# with the errors that happened at a level below (children).
|
||||
#
|
||||
# @param atom [Parslet::Atoms::Base] parslet that failed
|
||||
# @param source [Source] Source that we're using for this parse. (line
|
||||
# number information...)
|
||||
# @param message [String, Array] Error message at this level.
|
||||
# @param children [Array] A list of errors from a deeper level (or nil).
|
||||
# @return [Cause] An error tree combining children with message.
|
||||
#
|
||||
def err(atom, source, message, children=nil)
|
||||
position = source.pos
|
||||
Cause.format(source, position, message, children)
|
||||
end
|
||||
|
||||
# Produces an error cause that combines the message at the current level
|
||||
# with the errors that happened at a level below (children).
|
||||
#
|
||||
# @param atom [Parslet::Atoms::Base] parslet that failed
|
||||
# @param source [Source] Source that we're using for this parse. (line
|
||||
# number information...)
|
||||
# @param message [String, Array] Error message at this level.
|
||||
# @param pos [Fixnum] The real position of the error.
|
||||
# @param children [Array] A list of errors from a deeper level (or nil).
|
||||
# @return [Cause] An error tree combining children with message.
|
||||
#
|
||||
def err_at(atom, source, message, pos, children=nil)
|
||||
position = pos
|
||||
Cause.format(source, position, message, children)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
162
lib/parslet/export.rb
Normal file
162
lib/parslet/export.rb
Normal file
@ -0,0 +1,162 @@
|
||||
# Allows exporting parslet grammars to other lingos.
|
||||
|
||||
require 'set'
|
||||
require 'parslet/atoms/visitor'
|
||||
|
||||
class Parslet::Parser
|
||||
module Visitors
|
||||
class Citrus
|
||||
attr_reader :context, :output
|
||||
def initialize(context)
|
||||
@context = context
|
||||
end
|
||||
|
||||
def visit_str(str)
|
||||
"\"#{str.inspect[1..-2]}\""
|
||||
end
|
||||
def visit_re(match)
|
||||
match.to_s
|
||||
end
|
||||
|
||||
def visit_entity(name, block)
|
||||
context.deferred(name, block)
|
||||
|
||||
"(#{context.mangle_name(name)})"
|
||||
end
|
||||
def visit_named(name, parslet)
|
||||
parslet.accept(self)
|
||||
end
|
||||
|
||||
def visit_sequence(parslets)
|
||||
'(' <<
|
||||
parslets.
|
||||
map { |el| el.accept(self) }.
|
||||
join(' ') <<
|
||||
')'
|
||||
end
|
||||
def visit_repetition(tag, min, max, parslet)
|
||||
parslet.accept(self) << "#{min}*#{max}"
|
||||
end
|
||||
def visit_alternative(alternatives)
|
||||
'(' <<
|
||||
alternatives.
|
||||
map { |el| el.accept(self) }.
|
||||
join(' | ') <<
|
||||
')'
|
||||
end
|
||||
|
||||
def visit_lookahead(positive, bound_parslet)
|
||||
(positive ? '&' : '!') <<
|
||||
bound_parslet.accept(self)
|
||||
end
|
||||
end
|
||||
|
||||
class Treetop < Citrus
|
||||
def visit_repetition(tag, min, max, parslet)
|
||||
parslet.accept(self) << "#{min}..#{max}"
|
||||
end
|
||||
|
||||
def visit_alternative(alternatives)
|
||||
'(' <<
|
||||
alternatives.
|
||||
map { |el| el.accept(self) }.
|
||||
join(' / ') <<
|
||||
')'
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# A helper class that formats Citrus and Treetop grammars as a string.
|
||||
#
|
||||
class PrettyPrinter
|
||||
attr_reader :visitor
|
||||
def initialize(visitor_klass)
|
||||
@visitor = visitor_klass.new(self)
|
||||
end
|
||||
|
||||
# Pretty prints the given parslet using the visitor that has been
|
||||
# configured in initialize. Returns the string representation of the
|
||||
# Citrus or Treetop grammar.
|
||||
#
|
||||
def pretty_print(name, parslet)
|
||||
output = "grammar #{name}\n"
|
||||
|
||||
output << rule('root', parslet)
|
||||
|
||||
seen = Set.new
|
||||
loop do
|
||||
# @todo is constantly filled by the visitor (see #deferred). We
|
||||
# keep going until it is empty.
|
||||
break if @todo.empty?
|
||||
name, block = @todo.shift
|
||||
|
||||
# Track what rules we've already seen. This breaks loops.
|
||||
next if seen.include?(name)
|
||||
seen << name
|
||||
|
||||
output << rule(name, block.call)
|
||||
end
|
||||
|
||||
output << "end\n"
|
||||
end
|
||||
|
||||
# Formats a rule in either dialect.
|
||||
#
|
||||
def rule(name, parslet)
|
||||
" rule #{mangle_name name}\n" <<
|
||||
" " << parslet.accept(visitor) << "\n" <<
|
||||
" end\n"
|
||||
end
|
||||
|
||||
# Whenever the visitor encounters an rule in a parslet, it defers the
|
||||
# pretty printing of the rule by calling this method.
|
||||
#
|
||||
def deferred(name, content)
|
||||
@todo ||= []
|
||||
@todo << [name, content]
|
||||
end
|
||||
|
||||
# Mangles names so that Citrus and Treetop can live with it. This mostly
|
||||
# transforms some of the things that Ruby allows into other patterns. If
|
||||
# there is collision, we will not detect it for now.
|
||||
#
|
||||
def mangle_name(str)
|
||||
str.to_s.sub(/\?$/, '_p')
|
||||
end
|
||||
end
|
||||
|
||||
# Exports the current parser instance as a string in the Citrus dialect.
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# require 'parslet/export'
|
||||
# class MyParser < Parslet::Parser
|
||||
# root(:expression)
|
||||
# rule(:expression) { str('foo') }
|
||||
# end
|
||||
#
|
||||
# MyParser.new.to_citrus # => a citrus grammar as a string
|
||||
#
|
||||
def to_citrus
|
||||
PrettyPrinter.new(Visitors::Citrus).
|
||||
pretty_print(self.class.name, root)
|
||||
end
|
||||
|
||||
# Exports the current parser instance as a string in the Treetop dialect.
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# require 'parslet/export'
|
||||
# class MyParser < Parslet::Parser
|
||||
# root(:expression)
|
||||
# rule(:expression) { str('foo') }
|
||||
# end
|
||||
#
|
||||
# MyParser.new.to_treetop # => a treetop grammar as a string
|
||||
#
|
||||
def to_treetop
|
||||
PrettyPrinter.new(Visitors::Treetop).
|
||||
pretty_print(self.class.name, root)
|
||||
end
|
||||
end
|
||||
|
51
lib/parslet/expression.rb
Normal file
51
lib/parslet/expression.rb
Normal file
@ -0,0 +1,51 @@
|
||||
|
||||
# Allows specifying rules as strings using the exact same grammar that treetop
|
||||
# does, minus the actions. This is on one hand a good example of a fully
|
||||
# fledged parser and on the other hand might even turn out really useful.
|
||||
#
|
||||
# This can be viewed as an extension to parslet and might even be hosted in
|
||||
# its own gem one fine day.
|
||||
#
|
||||
class Parslet::Expression
|
||||
include Parslet
|
||||
|
||||
autoload :Treetop, 'parslet/expression/treetop'
|
||||
|
||||
# Creates a parslet from a foreign language expression.
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# Parslet::Expression.new("'a' 'b'")
|
||||
#
|
||||
def initialize(str, opts={}, context=self)
|
||||
@type = opts[:type] || :treetop
|
||||
@exp = str
|
||||
@parslet = transform(
|
||||
parse(str))
|
||||
end
|
||||
|
||||
# Transforms the parse tree into a parslet expression.
|
||||
#
|
||||
def transform(tree)
|
||||
transform = Treetop::Transform.new
|
||||
|
||||
# pp tree
|
||||
transform.apply(tree)
|
||||
rescue
|
||||
warn "Could not transform: " + tree.inspect
|
||||
raise
|
||||
end
|
||||
|
||||
# Parses the string and returns a parse tree.
|
||||
#
|
||||
def parse(str)
|
||||
parser = Treetop::Parser.new
|
||||
parser.parse(str)
|
||||
end
|
||||
|
||||
# Turns this expression into a parslet.
|
||||
#
|
||||
def to_parslet
|
||||
@parslet
|
||||
end
|
||||
end
|
92
lib/parslet/expression/treetop.rb
Normal file
92
lib/parslet/expression/treetop.rb
Normal file
@ -0,0 +1,92 @@
|
||||
class Parslet::Expression::Treetop
|
||||
class Parser < Parslet::Parser
|
||||
root(:expression)
|
||||
|
||||
rule(:expression) { alternatives }
|
||||
|
||||
# alternative 'a' / 'b'
|
||||
rule(:alternatives) {
|
||||
(simple >> (spaced('/') >> simple).repeat).as(:alt)
|
||||
}
|
||||
|
||||
# sequence by simple concatenation 'a' 'b'
|
||||
rule(:simple) { occurrence.repeat(1).as(:seq) }
|
||||
|
||||
# occurrence modifiers
|
||||
rule(:occurrence) {
|
||||
atom.as(:repetition) >> spaced('*').as(:sign) |
|
||||
atom.as(:repetition) >> spaced('+').as(:sign) |
|
||||
atom.as(:repetition) >> repetition_spec |
|
||||
|
||||
atom.as(:maybe) >> spaced('?') |
|
||||
atom
|
||||
}
|
||||
|
||||
rule(:atom) {
|
||||
spaced('(') >> expression.as(:unwrap) >> spaced(')') |
|
||||
dot |
|
||||
string |
|
||||
char_class
|
||||
}
|
||||
|
||||
# a character class
|
||||
rule(:char_class) {
|
||||
(str('[') >>
|
||||
(str('\\') >> any |
|
||||
str(']').absent? >> any).repeat(1) >>
|
||||
str(']')).as(:match) >> space?
|
||||
}
|
||||
|
||||
# anything at all
|
||||
rule(:dot) { spaced('.').as(:any) }
|
||||
|
||||
# recognizing strings
|
||||
rule(:string) {
|
||||
str('\'') >>
|
||||
(
|
||||
(str('\\') >> any) |
|
||||
(str("'").absent? >> any)
|
||||
).repeat.as(:string) >>
|
||||
str('\'') >> space?
|
||||
}
|
||||
|
||||
# repetition specification like {1, 2}
|
||||
rule(:repetition_spec) {
|
||||
spaced('{') >>
|
||||
integer.maybe.as(:min) >> spaced(',') >>
|
||||
integer.maybe.as(:max) >> spaced('}')
|
||||
}
|
||||
rule(:integer) {
|
||||
match['0-9'].repeat(1)
|
||||
}
|
||||
|
||||
# whitespace handling
|
||||
rule(:space) { match("\s").repeat(1) }
|
||||
rule(:space?) { space.maybe }
|
||||
|
||||
def spaced(str)
|
||||
str(str) >> space?
|
||||
end
|
||||
end
|
||||
|
||||
class Transform < Parslet::Transform
|
||||
|
||||
rule(:repetition => simple(:rep), :sign => simple(:sign)) {
|
||||
min = sign=='+' ? 1 : 0
|
||||
Parslet::Atoms::Repetition.new(rep, min, nil) }
|
||||
rule(:repetition => simple(:rep), :min => simple(:min), :max => simple(:max)) {
|
||||
Parslet::Atoms::Repetition.new(rep,
|
||||
Integer(min || 0),
|
||||
max && Integer(max) || nil) }
|
||||
|
||||
rule(:alt => subtree(:alt)) { Parslet::Atoms::Alternative.new(*alt) }
|
||||
rule(:seq => sequence(:s)) { Parslet::Atoms::Sequence.new(*s) }
|
||||
rule(:unwrap => simple(:u)) { u }
|
||||
rule(:maybe => simple(:m)) { |d| d[:m].maybe }
|
||||
rule(:string => simple(:s)) { Parslet::Atoms::Str.new(s) }
|
||||
rule(:match => simple(:m)) { Parslet::Atoms::Re.new(m) }
|
||||
rule(:any => simple(:a)) { Parslet::Atoms::Re.new('.') }
|
||||
end
|
||||
|
||||
end
|
||||
|
97
lib/parslet/graphviz.rb
Normal file
97
lib/parslet/graphviz.rb
Normal file
@ -0,0 +1,97 @@
|
||||
|
||||
# Paints a graphviz graph of your parser.
|
||||
|
||||
begin
|
||||
require 'ruby-graphviz'
|
||||
rescue LoadError
|
||||
puts "Please install the 'ruby-graphviz' gem first."
|
||||
fail
|
||||
end
|
||||
|
||||
require 'set'
|
||||
require 'parslet/atoms/visitor'
|
||||
|
||||
module Parslet
|
||||
class GraphvizVisitor
|
||||
def initialize g
|
||||
@graph = g
|
||||
@known_links = Set.new
|
||||
@visited = Set.new
|
||||
end
|
||||
|
||||
attr_reader :parent
|
||||
|
||||
def visit_parser(root)
|
||||
recurse root, node('parser')
|
||||
end
|
||||
def visit_entity(name, block)
|
||||
s = node(name)
|
||||
|
||||
downwards s
|
||||
|
||||
return if @visited.include?(name)
|
||||
@visited << name
|
||||
|
||||
recurse block.call, s
|
||||
end
|
||||
def visit_named(name, atom)
|
||||
recurse atom, parent
|
||||
end
|
||||
def visit_repetition(tag, min, max, atom)
|
||||
recurse atom, parent
|
||||
end
|
||||
def visit_alternative(alternatives)
|
||||
p = parent
|
||||
alternatives.each do |atom|
|
||||
recurse atom, p
|
||||
end
|
||||
end
|
||||
def visit_sequence(sequence)
|
||||
p = parent
|
||||
sequence.each do |atom|
|
||||
recurse atom, p
|
||||
end
|
||||
end
|
||||
def visit_lookahead(positive, atom)
|
||||
recurse atom, parent
|
||||
end
|
||||
def visit_re(regexp)
|
||||
# downwards node(regexp.object_id, label: escape("re(#{regexp.inspect})"))
|
||||
end
|
||||
def visit_str(str)
|
||||
# downwards node(str.object_id, label: escape("#{str.inspect}"))
|
||||
end
|
||||
|
||||
def escape str
|
||||
str.gsub('"', "'")
|
||||
end
|
||||
def node name, opts={}
|
||||
@graph.add_nodes name.to_s, opts
|
||||
end
|
||||
def downwards child
|
||||
if @parent && !@known_links.include?([@parent, child])
|
||||
@graph.add_edges(@parent, child)
|
||||
@known_links << [@parent, child]
|
||||
end
|
||||
end
|
||||
def recurse node, current
|
||||
@parent = current
|
||||
node.accept(self)
|
||||
end
|
||||
end
|
||||
|
||||
module Graphable
|
||||
def graph opts
|
||||
g = GraphViz.new(:G, type: :digraph)
|
||||
visitor = GraphvizVisitor.new(g)
|
||||
|
||||
new.accept(visitor)
|
||||
|
||||
g.output opts
|
||||
end
|
||||
end
|
||||
|
||||
class Parser # reopen for introducing the .graph method
|
||||
extend Graphable
|
||||
end
|
||||
end
|
67
lib/parslet/parser.rb
Normal file
67
lib/parslet/parser.rb
Normal file
@ -0,0 +1,67 @@
|
||||
|
||||
# The base class for all your parsers. Use as follows:
|
||||
#
|
||||
# require 'parslet'
|
||||
#
|
||||
# class MyParser < Parslet::Parser
|
||||
# rule(:a) { str('a').repeat }
|
||||
# root(:a)
|
||||
# end
|
||||
#
|
||||
# pp MyParser.new.parse('aaaa') # => 'aaaa'
|
||||
# pp MyParser.new.parse('bbbb') # => Parslet::Atoms::ParseFailed:
|
||||
# # Don't know what to do with bbbb at line 1 char 1.
|
||||
#
|
||||
# Parslet::Parser is also a grammar atom. This means that you can mix full
|
||||
# fledged parsers freely with small parts of a different parser.
|
||||
#
|
||||
# Example:
|
||||
# class ParserA < Parslet::Parser
|
||||
# root :aaa
|
||||
# rule(:aaa) { str('a').repeat(3,3) }
|
||||
# end
|
||||
# class ParserB < Parslet::Parser
|
||||
# root :expression
|
||||
# rule(:expression) { str('b') >> ParserA.new >> str('b') }
|
||||
# end
|
||||
#
|
||||
# In the above example, ParserB would parse something like 'baaab'.
|
||||
#
|
||||
class Parslet::Parser < Parslet::Atoms::Base
|
||||
include Parslet
|
||||
|
||||
class <<self # class methods
|
||||
# Define the parsers #root function. This is the place where you start
|
||||
# parsing; if you have a rule for 'file' that describes what should be
|
||||
# in a file, this would be your root declaration:
|
||||
#
|
||||
# class Parser
|
||||
# root :file
|
||||
# rule(:file) { ... }
|
||||
# end
|
||||
#
|
||||
# #root declares a 'parse' function that works just like the parse
|
||||
# function that you can call on a simple parslet, taking a string as input
|
||||
# and producing parse output.
|
||||
#
|
||||
# In a way, #root is a shorthand for:
|
||||
#
|
||||
# def parse(str)
|
||||
# your_parser_root.parse(str)
|
||||
# end
|
||||
#
|
||||
def root(name)
|
||||
define_method(:root) do
|
||||
self.send(name)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def try(source, context, consume_all)
|
||||
root.try(source, context, consume_all)
|
||||
end
|
||||
|
||||
def to_s_inner(prec)
|
||||
root.to_s(prec)
|
||||
end
|
||||
end
|
114
lib/parslet/pattern.rb
Normal file
114
lib/parslet/pattern.rb
Normal file
@ -0,0 +1,114 @@
|
||||
# Matches trees against expressions. Trees are formed by arrays and hashes
|
||||
# for expressing membership and sequence. The leafs of the tree are other
|
||||
# classes.
|
||||
#
|
||||
# A tree issued by the parslet library might look like this:
|
||||
#
|
||||
# {
|
||||
# :function_call => {
|
||||
# :name => 'foobar',
|
||||
# :args => [1, 2, 3]
|
||||
# }
|
||||
# }
|
||||
#
|
||||
# A pattern that would match against this tree would be:
|
||||
#
|
||||
# { :function_call => { :name => simple(:name), :args => sequence(:args) }}
|
||||
#
|
||||
# Note that Parslet::Pattern only matches at a given subtree; it wont try
|
||||
# to match recursively. To do that, please use Parslet::Transform.
|
||||
#
|
||||
class Parslet::Pattern
|
||||
def initialize(pattern)
|
||||
@pattern = pattern
|
||||
end
|
||||
|
||||
# Decides if the given subtree matches this pattern. Returns the bindings
|
||||
# made on a successful match or nil if the match fails. If you specify
|
||||
# bindings to be a hash, the mappings in it will be treated like bindings
|
||||
# made during an attempted match.
|
||||
#
|
||||
# Pattern.new('a').match('a', :foo => 'bar') # => { :foo => 'bar' }
|
||||
#
|
||||
# @param subtree [String, Hash, Array] poro subtree returned by a parse
|
||||
# @param bindings [Hash] variable bindings to be verified
|
||||
# @return [Hash, nil] On success: variable bindings that allow a match. On
|
||||
# failure: nil
|
||||
#
|
||||
def match(subtree, bindings=nil)
|
||||
bindings = bindings && bindings.dup || Hash.new
|
||||
return bindings if element_match(subtree, @pattern, bindings)
|
||||
end
|
||||
|
||||
# Returns true if the tree element given by +tree+ matches the expression
|
||||
# given by +exp+. This match must respect bindings already made in
|
||||
# +bindings+. Note that bindings is carried along and modified.
|
||||
#
|
||||
# @api private
|
||||
#
|
||||
def element_match(tree, exp, bindings)
|
||||
# p [:elm, tree, exp]
|
||||
case [tree, exp].map { |e| e.class }
|
||||
when [Hash,Hash]
|
||||
return element_match_hash(tree, exp, bindings)
|
||||
when [Array,Array]
|
||||
return element_match_ary_single(tree, exp, bindings)
|
||||
else
|
||||
# If elements match exactly, then that is good enough in all cases
|
||||
return true if exp === tree
|
||||
|
||||
# If exp is a bind variable: Check if the binding matches
|
||||
if exp.respond_to?(:can_bind?) && exp.can_bind?(tree)
|
||||
return element_match_binding(tree, exp, bindings)
|
||||
end
|
||||
|
||||
# Otherwise: No match (we don't know anything about the element
|
||||
# combination)
|
||||
return false
|
||||
end
|
||||
end
|
||||
|
||||
# @api private
|
||||
#
|
||||
def element_match_binding(tree, exp, bindings)
|
||||
var_name = exp.variable_name
|
||||
|
||||
# TODO test for the hidden :_ feature.
|
||||
if var_name && bound_value = bindings[var_name]
|
||||
return bound_value == tree
|
||||
end
|
||||
|
||||
# New binding:
|
||||
bindings.store var_name, tree
|
||||
|
||||
return true
|
||||
end
|
||||
|
||||
# @api private
|
||||
#
|
||||
def element_match_ary_single(sequence, exp, bindings)
|
||||
return false if sequence.size != exp.size
|
||||
|
||||
return sequence.zip(exp).all? { |elt, subexp|
|
||||
element_match(elt, subexp, bindings) }
|
||||
end
|
||||
|
||||
# @api private
|
||||
#
|
||||
def element_match_hash(tree, exp, bindings)
|
||||
# Early failure when one hash is bigger than the other
|
||||
return false unless exp.size == tree.size
|
||||
|
||||
# We iterate over expected pattern, since we demand that the keys that
|
||||
# are there should be in tree as well.
|
||||
exp.each do |expected_key, expected_value|
|
||||
return false unless tree.has_key? expected_key
|
||||
|
||||
# Recurse into the value and stop early on failure
|
||||
value = tree[expected_key]
|
||||
return false unless element_match(value, expected_value, bindings)
|
||||
end
|
||||
|
||||
return true
|
||||
end
|
||||
end
|
49
lib/parslet/pattern/binding.rb
Normal file
49
lib/parslet/pattern/binding.rb
Normal file
@ -0,0 +1,49 @@
|
||||
|
||||
# Used internally for representing a bind placeholder in a Parslet::Transform
|
||||
# pattern. This is the superclass for all bindings.
|
||||
#
|
||||
# It defines the most permissive kind of bind, the one that matches any subtree
|
||||
# whatever it looks like.
|
||||
#
|
||||
class Parslet::Pattern::SubtreeBind < Struct.new(:symbol)
|
||||
def variable_name
|
||||
symbol
|
||||
end
|
||||
|
||||
def inspect
|
||||
"#{bind_type_name}(#{symbol.inspect})"
|
||||
end
|
||||
|
||||
def can_bind?(subtree)
|
||||
true
|
||||
end
|
||||
|
||||
private
|
||||
def bind_type_name
|
||||
if md=self.class.name.match(/(\w+)Bind/)
|
||||
md.captures.first.downcase
|
||||
else
|
||||
# This path should never be used, but since this is for inspection only,
|
||||
# let's not raise.
|
||||
'unknown_bind'
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Binds a symbol to a simple subtree, one that is not either a sequence of
|
||||
# elements or a collection of attributes.
|
||||
#
|
||||
class Parslet::Pattern::SimpleBind < Parslet::Pattern::SubtreeBind
|
||||
def can_bind?(subtree)
|
||||
not [Hash, Array].include?(subtree.class)
|
||||
end
|
||||
end
|
||||
|
||||
# Binds a symbol to a sequence of simple leafs ([element1, element2, ...])
|
||||
#
|
||||
class Parslet::Pattern::SequenceBind < Parslet::Pattern::SubtreeBind
|
||||
def can_bind?(subtree)
|
||||
subtree.kind_of?(Array) &&
|
||||
(not subtree.any? { |el| [Hash, Array].include?(el.class) })
|
||||
end
|
||||
end
|
59
lib/parslet/rig/rspec.rb
Normal file
59
lib/parslet/rig/rspec.rb
Normal file
@ -0,0 +1,59 @@
|
||||
RSpec::Matchers.define(:parse) do |input, opts|
|
||||
as = block = nil
|
||||
result = trace = nil
|
||||
|
||||
unless self.respond_to? :failure_message # if RSpec 2.x
|
||||
class << self
|
||||
alias_method :failure_message, :failure_message_for_should
|
||||
alias_method :failure_message_when_negated, :failure_message_for_should_not
|
||||
end
|
||||
end
|
||||
|
||||
match do |parser|
|
||||
begin
|
||||
result = parser.parse(input)
|
||||
block ?
|
||||
block.call(result) :
|
||||
(as == result || as.nil?)
|
||||
rescue Parslet::ParseFailed => ex
|
||||
trace = ex.cause.ascii_tree if opts && opts[:trace]
|
||||
false
|
||||
end
|
||||
end
|
||||
|
||||
failure_message do |is|
|
||||
if block
|
||||
"expected output of parsing #{input.inspect}" <<
|
||||
" with #{is.inspect} to meet block conditions, but it didn't"
|
||||
else
|
||||
"expected " <<
|
||||
(as ?
|
||||
"output of parsing #{input.inspect}"<<
|
||||
" with #{is.inspect} to equal #{as.inspect}, but was #{result.inspect}" :
|
||||
"#{is.inspect} to be able to parse #{input.inspect}") <<
|
||||
(trace ?
|
||||
"\n"+trace :
|
||||
'')
|
||||
end
|
||||
end
|
||||
|
||||
failure_message_when_negated do |is|
|
||||
if block
|
||||
"expected output of parsing #{input.inspect} with #{is.inspect} not to meet block conditions, but it did"
|
||||
else
|
||||
"expected " <<
|
||||
(as ?
|
||||
"output of parsing #{input.inspect}"<<
|
||||
" with #{is.inspect} not to equal #{as.inspect}" :
|
||||
|
||||
"#{is.inspect} to not parse #{input.inspect}, but it did")
|
||||
end
|
||||
end
|
||||
|
||||
# NOTE: This has a nodoc tag since the rdoc parser puts this into
|
||||
# Object, a thing I would never allow.
|
||||
chain :as do |expected_output, &block|
|
||||
as = expected_output
|
||||
block = block
|
||||
end
|
||||
end
|
42
lib/parslet/scope.rb
Normal file
42
lib/parslet/scope.rb
Normal file
@ -0,0 +1,42 @@
|
||||
class Parslet::Scope
|
||||
# Raised when the accessed slot has never been assigned a value.
|
||||
#
|
||||
class NotFound < StandardError
|
||||
end
|
||||
|
||||
class Binding
|
||||
attr_reader :parent
|
||||
|
||||
def initialize(parent=nil)
|
||||
@parent = parent
|
||||
@hash = Hash.new
|
||||
end
|
||||
|
||||
def [](k)
|
||||
@hash.has_key?(k) && @hash[k] ||
|
||||
parent && parent[k] or
|
||||
raise NotFound
|
||||
end
|
||||
def []=(k,v)
|
||||
@hash.store(k,v)
|
||||
end
|
||||
end
|
||||
|
||||
def [](k)
|
||||
@current[k]
|
||||
end
|
||||
def []=(k,v)
|
||||
@current[k] = v
|
||||
end
|
||||
|
||||
def initialize
|
||||
@current = Binding.new
|
||||
end
|
||||
|
||||
def push
|
||||
@current = Binding.new(@current)
|
||||
end
|
||||
def pop
|
||||
@current = @current.parent
|
||||
end
|
||||
end
|
101
lib/parslet/slice.rb
Normal file
101
lib/parslet/slice.rb
Normal file
@ -0,0 +1,101 @@
|
||||
|
||||
# A slice is a small part from the parse input. A slice mainly behaves like
|
||||
# any other string, except that it remembers where it came from (offset in
|
||||
# original input).
|
||||
#
|
||||
# == Extracting line and column
|
||||
#
|
||||
# Using the #line_and_column method, you can extract the line and column in
|
||||
# the original input where this slice starts.
|
||||
#
|
||||
# Example:
|
||||
# slice.line_and_column # => [1, 13]
|
||||
# slice.offset # => 12
|
||||
#
|
||||
# == Likeness to strings
|
||||
#
|
||||
# Parslet::Slice behaves in many ways like a Ruby String. This likeness
|
||||
# however is not complete - many of the myriad of operations String supports
|
||||
# are not yet in Slice. You can always extract the internal string instance by
|
||||
# calling #to_s.
|
||||
#
|
||||
# These omissions are somewhat intentional. Rather than maintaining a full
|
||||
# delegation, we opt for a partial emulation that gets the job done.
|
||||
#
|
||||
class Parslet::Slice
|
||||
attr_reader :str, :offset
|
||||
attr_reader :line_cache
|
||||
|
||||
# Construct a slice using a string, an offset and an optional line cache.
|
||||
# The line cache should be able to answer to the #line_and_column message.
|
||||
#
|
||||
def initialize(string, offset, line_cache=nil)
|
||||
@str, @offset = string, offset
|
||||
@line_cache = line_cache
|
||||
end
|
||||
|
||||
# Compares slices to other slices or strings.
|
||||
#
|
||||
def == other
|
||||
str == other
|
||||
end
|
||||
|
||||
# Match regular expressions.
|
||||
#
|
||||
def match(regexp)
|
||||
str.match(regexp)
|
||||
end
|
||||
|
||||
# Returns the slices size in characters.
|
||||
#
|
||||
def size
|
||||
str.size
|
||||
end
|
||||
|
||||
# Concatenate two slices; it is assumed that the second slice begins
|
||||
# where the first one ends. The offset of the resulting slice is the same
|
||||
# as the one of this slice.
|
||||
#
|
||||
def +(other)
|
||||
self.class.new(str + other.to_s, offset, line_cache)
|
||||
end
|
||||
|
||||
# Returns a <line, column> tuple referring to the original input.
|
||||
#
|
||||
def line_and_column
|
||||
raise ArgumentError, "No line cache was given, cannot infer line and column." \
|
||||
unless line_cache
|
||||
|
||||
line_cache.line_and_column(self.offset)
|
||||
end
|
||||
|
||||
|
||||
# Conversion operators -----------------------------------------------------
|
||||
def to_str
|
||||
str
|
||||
end
|
||||
alias to_s to_str
|
||||
|
||||
def to_slice
|
||||
self
|
||||
end
|
||||
def to_sym
|
||||
str.to_sym
|
||||
end
|
||||
def to_int
|
||||
Integer(str)
|
||||
end
|
||||
def to_i
|
||||
str.to_i
|
||||
end
|
||||
def to_f
|
||||
str.to_f
|
||||
end
|
||||
|
||||
# Inspection & Debugging ---------------------------------------------------
|
||||
|
||||
# Prints the slice as <code>"string"@offset</code>.
|
||||
def inspect
|
||||
str.inspect << "@#{offset}"
|
||||
end
|
||||
end
|
87
lib/parslet/source.rb
Normal file
87
lib/parslet/source.rb
Normal file
@ -0,0 +1,87 @@
|
||||
|
||||
require 'stringio'
|
||||
require 'strscan'
|
||||
|
||||
require 'parslet/source/line_cache'
|
||||
|
||||
module Parslet
|
||||
# Wraps the input string for parslet.
|
||||
#
|
||||
class Source
|
||||
def initialize(str)
|
||||
raise(
|
||||
ArgumentError,
|
||||
"Must construct Source with a string like object."
|
||||
) unless str.respond_to?(:to_str)
|
||||
|
||||
@str = StringScanner.new(str)
|
||||
|
||||
# maps 1 => /./m, 2 => /../m, etc...
|
||||
@re_cache = Hash.new { |h,k|
|
||||
h[k] = /(.|$){#{k}}/m }
|
||||
|
||||
@line_cache = LineCache.new
|
||||
@line_cache.scan_for_line_endings(0, str)
|
||||
end
|
||||
|
||||
# Checks if the given pattern matches at the current input position.
|
||||
#
|
||||
# @param pattern [Regexp] pattern to check for
|
||||
# @return [Boolean] true if the pattern matches at #pos
|
||||
#
|
||||
def matches?(pattern)
|
||||
@str.match?(pattern)
|
||||
end
|
||||
alias match matches?
|
||||
|
||||
# Consumes n characters from the input, returning them as a slice of the
|
||||
# input.
|
||||
#
|
||||
def consume(n)
|
||||
original_pos = @str.pos
|
||||
slice_str = @str.scan(@re_cache[n])
|
||||
slice = Parslet::Slice.new(
|
||||
slice_str,
|
||||
original_pos,
|
||||
@line_cache)
|
||||
|
||||
return slice
|
||||
end
|
||||
|
||||
# Returns how many chars remain in the input.
|
||||
#
|
||||
def chars_left
|
||||
@str.rest_size
|
||||
end
|
||||
|
||||
# Returns how many chars there are between current position and the
|
||||
# string given. If the string given doesn't occur in the source, then
|
||||
# the remaining chars (#chars_left) are returned.
|
||||
#
|
||||
# @return [Fixnum] count of chars until str or #chars_left
|
||||
#
|
||||
def chars_until str
|
||||
slice_str = @str.check_until(Regexp.new(Regexp.escape(str)))
|
||||
return chars_left unless slice_str
|
||||
return slice_str.size - str.size
|
||||
end
|
||||
|
||||
# Position of the parse as a character offset into the original string.
|
||||
# @note: Encodings...
|
||||
def pos
|
||||
@str.pos
|
||||
end
|
||||
def pos=(n)
|
||||
@str.pos = n
|
||||
rescue RangeError
|
||||
end
|
||||
|
||||
# Returns a <line, column> tuple for the given position. If no position is
|
||||
# given, line/column information is returned for the current position
|
||||
# given by #pos.
|
||||
#
|
||||
def line_and_column(position=nil)
|
||||
@line_cache.line_and_column(position || self.pos)
|
||||
end
|
||||
end
|
||||
end
|
96
lib/parslet/source/line_cache.rb
Normal file
96
lib/parslet/source/line_cache.rb
Normal file
@ -0,0 +1,96 @@
|
||||
|
||||
|
||||
class Parslet::Source
|
||||
# A cache for line start positions.
|
||||
#
|
||||
class LineCache
|
||||
def initialize
|
||||
# Stores line endings as a simple position number. The first line always
|
||||
# starts at 0; numbers beyond the biggest entry are on any line > size,
|
||||
# but probably make a scan to that position neccessary.
|
||||
@line_ends = []
|
||||
@line_ends.extend RangeSearch
|
||||
end
|
||||
|
||||
# Returns a <line, column> tuple for the given input position.
|
||||
#
|
||||
def line_and_column(pos)
|
||||
eol_idx = @line_ends.lbound(pos)
|
||||
|
||||
if eol_idx
|
||||
# eol_idx points to the offset that ends the current line.
|
||||
# Let's try to find the offset that starts it:
|
||||
offset = eol_idx>0 && @line_ends[eol_idx-1] || 0
|
||||
return [eol_idx+1, pos-offset+1]
|
||||
else
|
||||
# eol_idx is nil, that means that we're beyond the last line end that
|
||||
# we know about. Pretend for now that we're just on the last line.
|
||||
offset = @line_ends.last || 0
|
||||
return [@line_ends.size+1, pos-offset+1]
|
||||
end
|
||||
end
|
||||
|
||||
def scan_for_line_endings(start_pos, buf)
|
||||
return unless buf
|
||||
|
||||
buf = StringScanner.new(buf)
|
||||
return unless buf.exist?(/\n/)
|
||||
|
||||
## If we have already read part or all of buf, we already know about
|
||||
## line ends in that portion. remove it and correct cur (search index)
|
||||
if @last_line_end && start_pos < @last_line_end
|
||||
# Let's not search the range from start_pos to last_line_end again.
|
||||
buf.pos = @last_line_end - start_pos
|
||||
end
|
||||
|
||||
## Scan the string for line endings; store the positions of all endings
|
||||
## in @line_ends.
|
||||
while buf.skip_until(/\n/)
|
||||
@last_line_end = start_pos + buf.pos
|
||||
@line_ends << @last_line_end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Mixin for arrays that implicitly give a number of ranges, where one range
|
||||
# begins where the other one ends.
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# [10, 20, 30]
|
||||
# # would describe [0, 10], (10, 20], (20, 30]
|
||||
#
|
||||
module RangeSearch
|
||||
def find_mid(left, right)
|
||||
# NOTE: Jonathan Hinkle reported that when mathn is required, just
|
||||
# dividing and relying on the integer truncation is not enough.
|
||||
left + ((right - left) / 2).floor
|
||||
end
|
||||
|
||||
# Scans the array for the first number that is > than bound. Returns the
|
||||
# index of that number.
|
||||
#
|
||||
def lbound(bound)
|
||||
return nil if empty?
|
||||
return nil unless last > bound
|
||||
|
||||
left = 0
|
||||
right = size - 1
|
||||
|
||||
loop do
|
||||
mid = find_mid(left, right)
|
||||
|
||||
if self[mid] > bound
|
||||
right = mid
|
||||
else
|
||||
# assert: self[mid] <= bound
|
||||
left = mid+1
|
||||
end
|
||||
|
||||
if right <= left
|
||||
return right
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
236
lib/parslet/transform.rb
Normal file
236
lib/parslet/transform.rb
Normal file
@ -0,0 +1,236 @@
|
||||
|
||||
require 'parslet/pattern'
|
||||
|
||||
# Transforms an expression tree into something else. The transformation
|
||||
# performs a depth-first, post-order traversal of the expression tree. During
|
||||
# that traversal, each time a rule matches a node, the node is replaced by the
|
||||
# result of the block associated to the rule. Otherwise the node is accepted
|
||||
# as is into the result tree.
|
||||
#
|
||||
# This is almost what you would generally do with a tree visitor, except that
|
||||
# you can match several levels of the tree at once.
|
||||
#
|
||||
# As a consequence of this, the resulting tree will contain pieces of the
|
||||
# original tree and new pieces. Most likely, you will want to transform the
|
||||
# original tree wholly, so this isn't a problem.
|
||||
#
|
||||
# You will not be able to create a loop, given that each node will be replaced
|
||||
# only once and then left alone. This means that the results of a replacement
|
||||
# will not be acted upon.
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# class Example < Parslet::Transform
|
||||
# rule(:string => simple(:x)) { # (1)
|
||||
# StringLiteral.new(x)
|
||||
# }
|
||||
# end
|
||||
#
|
||||
# A tree transform (Parslet::Transform) is defined by a set of rules. Each
|
||||
# rule can be defined by calling #rule with the pattern as argument. The block
|
||||
# given will be called every time the rule matches somewhere in the tree given
|
||||
# to #apply. It is passed a Hash containing all the variable bindings of this
|
||||
# pattern match.
|
||||
#
|
||||
# In the above example, (1) illustrates a simple matching rule.
|
||||
#
|
||||
# Let's say you want to parse matching parentheses and distill a maximum nest
|
||||
# depth. You would probably write a parser like the one in example/parens.rb;
|
||||
# here's the relevant part:
|
||||
#
|
||||
# rule(:balanced) {
|
||||
# str('(').as(:l) >> balanced.maybe.as(:m) >> str(')').as(:r)
|
||||
# }
|
||||
#
|
||||
# If you now apply this to a string like '(())', you get a intermediate parse
|
||||
# tree that looks like this:
|
||||
#
|
||||
# {
|
||||
# l: '(',
|
||||
# m: {
|
||||
# l: '(',
|
||||
# m: nil,
|
||||
# r: ')'
|
||||
# },
|
||||
# r: ')'
|
||||
# }
|
||||
#
|
||||
# This parse tree is good for debugging, but what we would really like to have
|
||||
# is just the nesting depth. This transformation rule will produce that:
|
||||
#
|
||||
# rule(:l => '(', :m => simple(:x), :r => ')') {
|
||||
# # innermost :m will contain nil
|
||||
# x.nil? ? 1 : x+1
|
||||
# }
|
||||
#
|
||||
# = Usage patterns
|
||||
#
|
||||
# There are four ways of using this class. The first one is very much
|
||||
# recommended, followed by the second one for generality. The other ones are
|
||||
# omitted here.
|
||||
#
|
||||
# Recommended usage is as follows:
|
||||
#
|
||||
# class MyTransformator < Parslet::Transform
|
||||
# rule(...) { ... }
|
||||
# rule(...) { ... }
|
||||
# # ...
|
||||
# end
|
||||
# MyTransformator.new.apply(tree)
|
||||
#
|
||||
# Alternatively, you can use the Transform class as follows:
|
||||
#
|
||||
# transform = Parslet::Transform.new do
|
||||
# rule(...) { ... }
|
||||
# end
|
||||
# transform.apply(tree)
|
||||
#
|
||||
# = Execution context
|
||||
#
|
||||
# The execution context of action blocks differs depending on the arity of
|
||||
# said blocks. This can be confusing. It is however somewhat intentional. You
|
||||
# should not create fat Transform descendants containing a lot of helper methods,
|
||||
# instead keep your AST class construction in global scope or make it available
|
||||
# through a factory. The following piece of code illustrates usage of global
|
||||
# scope:
|
||||
#
|
||||
# transform = Parslet::Transform.new do
|
||||
# rule(...) { AstNode.new(a_variable) }
|
||||
# rule(...) { Ast.node(a_variable) } # modules are nice
|
||||
# end
|
||||
# transform.apply(tree)
|
||||
#
|
||||
# And here's how you would use a class builder (a factory):
|
||||
#
|
||||
# transform = Parslet::Transform.new do
|
||||
# rule(...) { builder.add_node(a_variable) }
|
||||
# rule(...) { |d| d[:builder].add_node(d[:a_variable]) }
|
||||
# end
|
||||
# transform.apply(tree, :builder => Builder.new)
|
||||
#
|
||||
# As you can see, Transform allows you to inject local context for your rule
|
||||
# action blocks to use.
|
||||
#
|
||||
class Parslet::Transform
|
||||
# FIXME: Maybe only part of it? Or maybe only include into constructor
|
||||
# context?
|
||||
include Parslet
|
||||
|
||||
class << self
|
||||
# FIXME: Only do this for subclasses?
|
||||
include Parslet
|
||||
|
||||
# Define a rule for the transform subclass.
|
||||
#
|
||||
def rule(expression, &block)
|
||||
@__transform_rules ||= []
|
||||
@__transform_rules << [Parslet::Pattern.new(expression), block]
|
||||
end
|
||||
|
||||
# Allows accessing the class' rules
|
||||
#
|
||||
def rules
|
||||
@__transform_rules || []
|
||||
end
|
||||
end
|
||||
|
||||
def initialize(&block)
|
||||
@rules = []
|
||||
|
||||
if block
|
||||
instance_eval(&block)
|
||||
end
|
||||
end
|
||||
|
||||
# Defines a rule to be applied whenever apply is called on a tree. A rule
|
||||
# is composed of two parts:
|
||||
#
|
||||
# * an *expression pattern*
|
||||
# * a *transformation block*
|
||||
#
|
||||
def rule(expression, &block)
|
||||
@rules << [
|
||||
Parslet::Pattern.new(expression),
|
||||
block
|
||||
]
|
||||
end
|
||||
|
||||
# Applies the transformation to a tree that is generated by Parslet::Parser
|
||||
# or a simple parslet. Transformation will proceed down the tree, replacing
|
||||
# parts/all of it with new objects. The resulting object will be returned.
|
||||
#
|
||||
def apply(obj, context=nil)
|
||||
transform_elt(
|
||||
case obj
|
||||
when Hash
|
||||
recurse_hash(obj, context)
|
||||
when Array
|
||||
recurse_array(obj, context)
|
||||
else
|
||||
obj
|
||||
end,
|
||||
context
|
||||
)
|
||||
end
|
||||
|
||||
# Executes the block on the bindings obtained by Pattern#match, if such a match
|
||||
# can be made. Depending on the arity of the given block, it is called in
|
||||
# one of two environments: the current one or a clean toplevel environment.
|
||||
#
|
||||
# If you would like the current environment preserved, please use the
|
||||
# arity 1 variant of the block. Alternatively, you can inject a context object
|
||||
# and call methods on it (think :ctx => self).
|
||||
#
|
||||
# # the local variable a is simulated
|
||||
# t.call_on_match(:a => :b) { a }
|
||||
# # no change of environment here
|
||||
# t.call_on_match(:a => :b) { |d| d[:a] }
|
||||
#
|
||||
def call_on_match(bindings, block)
|
||||
if block
|
||||
if block.arity == 1
|
||||
return block.call(bindings)
|
||||
else
|
||||
context = Context.new(bindings)
|
||||
return context.instance_eval(&block)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Allow easy access to all rules, the ones defined in the instance and the
|
||||
# ones predefined in a subclass definition.
|
||||
#
|
||||
def rules
|
||||
self.class.rules + @rules
|
||||
end
|
||||
|
||||
# @api private
|
||||
#
|
||||
def transform_elt(elt, context)
|
||||
rules.each do |pattern, block|
|
||||
if bindings=pattern.match(elt, context)
|
||||
# Produces transformed value
|
||||
return call_on_match(bindings, block)
|
||||
end
|
||||
end
|
||||
|
||||
# No rule matched - element is not transformed
|
||||
return elt
|
||||
end
|
||||
|
||||
# @api private
|
||||
#
|
||||
def recurse_hash(hsh, ctx)
|
||||
hsh.inject({}) do |new_hsh, (k,v)|
|
||||
new_hsh[k] = apply(v, ctx)
|
||||
new_hsh
|
||||
end
|
||||
end
|
||||
# @api private
|
||||
#
|
||||
def recurse_array(ary, ctx)
|
||||
ary.map { |elt| apply(elt, ctx) }
|
||||
end
|
||||
end
|
||||
|
||||
require 'parslet/context'
|
Loading…
Reference in New Issue
Block a user