vendored parslet, deemed stable enough and better without dependency

This commit is contained in:
Torsten Ruger 2014-04-27 15:34:35 +03:00
parent 6fafeda66d
commit b1203363d4
42 changed files with 3415 additions and 2 deletions

View File

@ -1,5 +1,3 @@
# parslet is assumed to be checked out at the same level as crystal for now
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', ".." , "parslet",'lib'))
require 'parslet'
require "asm/program"

302
lib/parslet.rb Normal file
View File

@ -0,0 +1,302 @@
# A simple parser generator library. Typical usage would look like this:
#
# require 'parslet'
#
# class MyParser < Parslet::Parser
# rule(:a) { str('a').repeat }
# root(:a)
# end
#
# pp MyParser.new.parse('aaaa') # => 'aaaa'@0
# pp MyParser.new.parse('bbbb') # => Parslet::Atoms::ParseFailed:
# # Don't know what to do with bbbb at line 1 char 1.
#
# The simple DSL allows you to define grammars in PEG-style. This kind of
# grammar construction does away with the ambiguities that usually comes with
# parsers; instead, it allows you to construct grammars that are easier to
# debug, since less magic is involved.
#
# Parslet is typically used in stages:
#
#
# * Parsing the input string; this yields an intermediary tree, see
# Parslet.any, Parslet.match, Parslet.str, Parslet::ClassMethods#rule and
# Parslet::ClassMethods#root.
# * Transformation of the tree into something useful to you, see
# Parslet::Transform, Parslet.simple, Parslet.sequence and Parslet.subtree.
#
# The first stage is traditionally intermingled with the second stage; output
# from the second stage is usually called the 'Abstract Syntax Tree' or AST.
#
# The stages are completely decoupled; You can change your grammar around and
# use the second stage to isolate the rest of your code from the changes
# you've effected.
#
# == Further reading
#
# All parslet atoms are subclasses of {Parslet::Atoms::Base}. You might want to
# look at all of those: {Parslet::Atoms::Re}, {Parslet::Atoms::Str},
# {Parslet::Atoms::Repetition}, {Parslet::Atoms::Sequence},
# {Parslet::Atoms::Alternative}.
#
# == When things go wrong
#
# A parse that fails will raise {Parslet::ParseFailed}. This exception contains
# all the details of what went wrong, including a detailed error trace that
# can be printed out as an ascii tree. ({Parslet::Cause})
#
module Parslet
# Extends classes that include Parslet with the module
# {Parslet::ClassMethods}.
#
def self.included(base)
base.extend(ClassMethods)
end
# Raised when the parse failed to match. It contains the message that should
# be presented to the user. More details can be extracted from the
# exceptions #cause member: It contains an instance of {Parslet::Cause} that
# stores all the details of your failed parse in a tree structure.
#
# begin
# parslet.parse(str)
# rescue Parslet::ParseFailed => failure
# puts failure.cause.ascii_tree
# end
#
# Alternatively, you can just require 'parslet/convenience' and call the
# method #parse_with_debug instead of #parse. This method will never raise
# and print error trees to stdout.
#
# require 'parslet/convenience'
# parslet.parse_with_debug(str)
#
class ParseFailed < StandardError
def initialize(message, cause=nil)
super(message)
@cause = cause
end
# Why the parse failed.
#
# @return [Parslet::Cause]
attr_reader :cause
end
module ClassMethods
# Define an entity for the parser. This generates a method of the same
# name that can be used as part of other patterns. Those methods can be
# freely mixed in your parser class with real ruby methods.
#
# class MyParser
# include Parslet
#
# rule(:bar) { str('bar') }
# rule(:twobar) do
# bar >> bar
# end
#
# root :twobar
# end
#
def rule(name, &definition)
define_method(name) do
@rules ||= {} # <name, rule> memoization
return @rules[name] if @rules.has_key?(name)
# Capture the self of the parser class along with the definition.
definition_closure = proc {
self.instance_eval(&definition)
}
@rules[name] = Atoms::Entity.new(name, &definition_closure)
end
end
end
# Allows for delayed construction of #match. See also Parslet.match.
#
# @api private
class DelayedMatchConstructor
def [](str)
Atoms::Re.new("[" + str + "]")
end
end
# Returns an atom matching a character class. All regular expressions can be
# used, as long as they match only a single character at a time.
#
# match('[ab]') # will match either 'a' or 'b'
# match('[\n\s]') # will match newlines and spaces
#
# There is also another (convenience) form of this method:
#
# match['a-z'] # synonymous to match('[a-z]')
# match['\n'] # synonymous to match('[\n]')
#
# @overload match(str)
# @param str [String] character class to match (regexp syntax)
# @return [Parslet::Atoms::Re] a parslet atom
#
def match(str=nil)
return DelayedMatchConstructor.new unless str
return Atoms::Re.new(str)
end
module_function :match
# Returns an atom matching the +str+ given:
#
# str('class') # will match 'class'
#
# @param str [String] string to match verbatim
# @return [Parslet::Atoms::Str] a parslet atom
#
def str(str)
Atoms::Str.new(str)
end
module_function :str
# Returns an atom matching any character. It acts like the '.' (dot)
# character in regular expressions.
#
# any.parse('a') # => 'a'
#
# @return [Parslet::Atoms::Re] a parslet atom
#
def any
Atoms::Re.new('.')
end
module_function :any
# Introduces a new capture scope. This means that all old captures stay
# accessible, but new values stored will only be available during the block
# given and the old values will be restored after the block.
#
# Example:
# # :a will be available until the end of the block. Afterwards,
# # :a from the outer scope will be available again, if such a thing
# # exists.
# scope { str('a').capture(:a) }
#
def scope(&block)
Parslet::Atoms::Scope.new(block)
end
module_function :scope
# Designates a piece of the parser as being dynamic. Dynamic parsers can
# either return a parser at runtime, which will be applied on the input, or
# return a result from a parse.
#
# Dynamic parse pieces are never cached and can introduce performance
# abnormalitites - use sparingly where other constructs fail.
#
# Example:
# # Parses either 'a' or 'b', depending on the weather
# dynamic { rand() < 0.5 ? str('a') : str('b') }
#
def dynamic(&block)
Parslet::Atoms::Dynamic.new(block)
end
module_function :dynamic
# Returns a parslet atom that parses infix expressions. Operations are
# specified as a list of <atom, precedence, associativity> tuples, where
# atom is simply the parslet atom that matches an operator, precedence is
# a number and associativity is either :left or :right.
#
# Higher precedence indicates that the operation should bind tighter than
# other operations with lower precedence. In common algebra, '+' has
# lower precedence than '*'. So you would have a precedence of 1 for '+' and
# a precedence of 2 for '*'. Only the order relation between these two
# counts, so any number would work.
#
# Associativity is what decides what interpretation to take for strings that
# are ambiguous like '1 + 2 + 3'. If '+' is specified as left associative,
# the expression would be interpreted as '(1 + 2) + 3'. If right
# associativity is chosen, it would be interpreted as '1 + (2 + 3)'. Note
# that the hash trees output reflect that choice as well.
#
# Example:
# infix_expression(integer, [add_op, 1, :left])
# # would parse things like '1 + 2'
#
# @param element [Parslet::Atoms::Base] elements that take the NUMBER position
# in the expression
# @param operations [Array<(Parslet::Atoms::Base, Integer, {:left, :right})>]
#
# @see Parslet::Atoms::Infix
#
def infix_expression(element, *operations)
Parslet::Atoms::Infix.new(element, operations)
end
module_function :infix_expression
# A special kind of atom that allows embedding whole treetop expressions
# into parslet construction.
#
# # the same as str('a') >> str('b').maybe
# exp(%Q("a" "b"?))
#
# @param str [String] a treetop expression
# @return [Parslet::Atoms::Base] the corresponding parslet parser
#
def exp(str)
Parslet::Expression.new(str).to_parslet
end
module_function :exp
# Returns a placeholder for a tree transformation that will only match a
# sequence of elements. The +symbol+ you specify will be the key for the
# matched sequence in the returned dictionary.
#
# # This would match a body element that contains several declarations.
# { :body => sequence(:declarations) }
#
# The above example would match <code>:body => ['a', 'b']</code>, but not
# <code>:body => 'a'</code>.
#
# see {Parslet::Transform}
#
def sequence(symbol)
Pattern::SequenceBind.new(symbol)
end
module_function :sequence
# Returns a placeholder for a tree transformation that will only match
# simple elements. This matches everything that <code>#sequence</code>
# doesn't match.
#
# # Matches a single header.
# { :header => simple(:header) }
#
# see {Parslet::Transform}
#
def simple(symbol)
Pattern::SimpleBind.new(symbol)
end
module_function :simple
# Returns a placeholder for tree transformation patterns that will match
# any kind of subtree.
#
# { :expression => subtree(:exp) }
#
def subtree(symbol)
Pattern::SubtreeBind.new(symbol)
end
module_function :subtree
autoload :Expression, 'parslet/expression'
end
require 'parslet/slice'
require 'parslet/cause'
require 'parslet/source'
require 'parslet/atoms'
require 'parslet/pattern'
require 'parslet/pattern/binding'
require 'parslet/transform'
require 'parslet/parser'
require 'parslet/error_reporter'
require 'parslet/scope'

161
lib/parslet/accelerator.rb Normal file
View File

@ -0,0 +1,161 @@
# Optimizes the parsers by pattern matching on the parser atoms and replacing
# matches with better versions. See the file qed/accelerators.md for a more
# in-depth description.
#
# Example:
# quote = str('"')
# parser = quote >> (quote.absent? >> any).repeat >> quote
#
# A = Accelerator # for making what follows a bit shorter
# optimized_parser = A.apply(parser,
# A.rule( (A.str(:x).absent? >> A.any).repeat ) { GobbleUp.new(x) })
#
# optimized_parser.parse('"Parsing is now fully optimized! (tm)"')
#
module Parslet::Accelerator
# An expression to match against a tree of parser atoms. Normally, an
# expression is produced by Parslet::Accelerator.any,
# Parslet::Accelerator.str or Parslet::Accelerator.re.
#
# Expressions can be chained much like parslet atoms can be:
#
# expr.repeat(1) # matching repetition
# expr.absent? # matching absent?
# expr.present? # matching present?
# expr1 >> expr2 # matching a sequence
# expr1 | expr2 # matching an alternation
#
# @see Parslet::Accelerator.str
# @see Parslet::Accelerator.re
# @see Parslet::Accelerator.any
#
# @see Parslet::Accelerator
#
class Expression
attr_reader :type
attr_reader :args
def initialize(type, *args)
@type = type
@args = args
end
# @return [Expression]
def >> other_expr
join_or_new :seq, other_expr
end
# @return [Expression]
def | other_expr
join_or_new :alt, other_expr
end
# @return [Expression]
def absent?
Expression.new(:absent, self)
end
# @return [Expression]
def present?
Expression.new(:present, self)
end
# @return [Expression]
def repeat min=0, max=nil
Expression.new(:rep, min, max, self)
end
# @return [Expression]
def as name
Expression.new(:as, name)
end
# @api private
# @return [Expression]
def join_or_new tag, other_expr
if type == tag
@args << other_expr
else
Expression.new(tag, self, other_expr)
end
end
end
module_function
# Returns a match expression that will match `str` parslet atoms.
#
# @return [Parslet::Accelerator::Expression]
#
def str variable, *constraints
Expression.new(:str, variable, *constraints)
end
# Returns a match expression that will match `match` parslet atoms.
#
# @return [Parslet::Accelerator::Expression]
#
def re variable, *constraints
Expression.new(:re, variable, *constraints)
end
# Returns a match expression that will match `any` parslet atoms.
#
# @return [Parslet::Accelerator::Expression]
#
def any
Expression.new(:re, ".")
end
# Given a parslet atom and an expression, will determine if the expression
# matches the atom. If successful, returns the bindings into the pattern
# that were made. If no bindings had to be made to make the match successful,
# the empty hash is returned.
#
# @param atom [Parslet::Atoms::Base] parslet atom to match against
# @param expr [Parslet::Accelerator::Expression] expression to match
# @return [nil, Hash] bindings for the match, nil on failure
#
def match atom, expr
engine = Engine.new
return engine.bindings if engine.match(atom, expr)
end
# Constructs an accelerator rule. A rule is a matching expression and the
# code that should be executed once the expression could be bound to a
# parser.
#
# Example:
# Accelerator.rule(Accelerator.any) { Parslet.match('.') }
#
def rule expression, &action
[expression, action]
end
# Given a parslet atom and a set of rules, tries to match the rules
# recursively through the parslet atom. Once a rule could be matched,
# its action block will be called.
#
# Example:
# quote = str('"')
# parser = quote >> (quote.absent? >> any).repeat >> quote
#
# A = Accelerator # for making what follows a bit shorter
# optimized_parser = A.apply(parser,
# A.rule( (A.str(:x).absent? >> A.any).repeat ) { GobbleUp.new(x) })
#
# optimized_parser.parse('"Parsing is now fully optimized! (tm)"')
#
# @param atom [Parslet::Atoms::Base] a parser to optimize
# @param *rules [Parslet::Accelerator::Rule] rules produced by .rule
# @return [Parslet::Atoms::Base] optimized parser
#
def apply atom, *rules
Application.new(atom, rules).call
end
end
require 'parslet/accelerator/engine'
require 'parslet/accelerator/application'

View File

@ -0,0 +1,62 @@
# @api private
module Parslet::Accelerator
class Application
def initialize atom, rules
@atom = atom
@rules = rules
end
def call
@atom.accept(self)
end
def visit_parser(root)
transform root.accept(self)
end
def visit_entity(name, block)
transform Parslet::Atoms::Entity.new(name) { block.call.accept(self) }
end
def visit_named(name, atom)
transform Parslet::Atoms::Named.new(atom.accept(self), name)
end
def visit_repetition(tag, min, max, atom)
transform Parslet::Atoms::Repetition.new(atom.accept(self), min, max, tag)
end
def visit_alternative(alternatives)
transform Parslet::Atoms::Alternative.new(
*alternatives.map { |atom| atom.accept(self) })
end
def visit_sequence(sequence)
transform Parslet::Atoms::Sequence.new(
*sequence.map { |atom| atom.accept(self) })
end
def visit_lookahead(positive, atom)
transform Parslet::Atoms::Lookahead.new(atom, positive)
end
def visit_re(regexp)
transform Parslet::Atoms::Re.new(regexp)
end
def visit_str(str)
transform Parslet::Atoms::Str.new(str)
end
def transform atom
@rules.each do |expr, action|
# Try and match each rule in turn
binding = Parslet::Accelerator.match(atom, expr)
if binding
# On a successful match, allow the rule action to transform the
# parslet into something new.
ctx = Parslet::Context.new(binding)
return ctx.instance_eval(&action)
end
end # rules.each
# If no rule matches, this is the fallback - a clean new parslet atom.
return atom
end
end
end
require 'parslet/context'

View File

@ -0,0 +1,112 @@
require 'parslet/atoms/visitor'
module Parslet::Accelerator
# @api private
class Apply
def initialize(engine, expr)
@engine = engine
@expr = expr
end
def visit_parser(root)
false
end
def visit_entity(name, block)
false
end
def visit_named(name, atom)
match(:as) do |key|
@engine.try_bind(key, name)
end
end
def visit_repetition(tag, min, max, atom)
match(:rep) do |e_min, e_max, expr|
e_min == min && e_max == max && @engine.match(atom, expr)
end
end
def visit_alternative(alternatives)
match(:alt) do |*expressions|
return false if alternatives.size != expressions.size
alternatives.zip(expressions).all? do |atom, expr|
@engine.match(atom, expr)
end
end
end
def visit_sequence(sequence)
match(:seq) do |*expressions|
return false if sequence.size != expressions.size
sequence.zip(expressions).all? do |atom, expr|
@engine.match(atom, expr)
end
end
end
def visit_lookahead(positive, atom)
match(:absent) do |expr|
return positive == false && @engine.match(atom, expr)
end
match(:present) do |expr|
return positive == true && @engine.match(atom, expr)
end
end
def visit_re(regexp)
match(:re) do |*bind_conditions|
bind_conditions.all? { |bind_cond|
@engine.try_bind(bind_cond, regexp) }
end
end
def visit_str(str)
match(:str) do |*bind_conditions|
bind_conditions.all? { |bind_cond|
@engine.try_bind(bind_cond, str) }
end
end
def match(type_tag)
expr_tag = @expr.type
if expr_tag == type_tag
yield *@expr.args
end
end
end
# @api private
class Engine
attr_reader :bindings
def initialize
@bindings = {}
end
def match(atom, expr)
atom.accept(
Apply.new(self, expr))
end
def try_bind(variable, value)
if bound? variable
return value == lookup(variable)
else
case variable
when Symbol
bind(variable, value)
else
# This does not look like a variable - let's try matching it against
# the value:
variable === value
end
end
end
def bound? var
@bindings.has_key? var
end
def lookup var
@bindings[var]
end
def bind var, val
@bindings[var] = val
end
end
end

35
lib/parslet/atoms.rb Normal file
View File

@ -0,0 +1,35 @@
# This is where parslets name comes from: Small parser atoms.
#
module Parslet::Atoms
# The precedence module controls parenthesis during the #inspect printing
# of parslets. It is not relevant to other aspects of the parsing.
#
module Precedence
prec = 0
BASE = (prec+=1) # everything else
LOOKAHEAD = (prec+=1) # &SOMETHING
REPETITION = (prec+=1) # 'a'+, 'a'?
SEQUENCE = (prec+=1) # 'a' 'b'
ALTERNATE = (prec+=1) # 'a' | 'b'
OUTER = (prec+=1) # printing is done here.
end
require 'parslet/atoms/can_flatten'
require 'parslet/atoms/context'
require 'parslet/atoms/dsl'
require 'parslet/atoms/base'
require 'parslet/atoms/named'
require 'parslet/atoms/lookahead'
require 'parslet/atoms/alternative'
require 'parslet/atoms/sequence'
require 'parslet/atoms/repetition'
require 'parslet/atoms/re'
require 'parslet/atoms/str'
require 'parslet/atoms/entity'
require 'parslet/atoms/capture'
require 'parslet/atoms/dynamic'
require 'parslet/atoms/scope'
require 'parslet/atoms/infix'
end

View File

@ -0,0 +1,50 @@
# Alternative during matching. Contains a list of parslets that is tried each
# one in turn. Only fails if all alternatives fail.
#
# Example:
#
# str('a') | str('b') # matches either 'a' or 'b'
#
class Parslet::Atoms::Alternative < Parslet::Atoms::Base
attr_reader :alternatives
# Constructs an Alternative instance using all given parslets in the order
# given. This is what happens if you call '|' on existing parslets, like
# this:
#
# str('a') | str('b')
#
def initialize(*alternatives)
super()
@alternatives = alternatives
@error_msg = "Expected one of #{alternatives.inspect}"
end
#---
# Don't construct a hanging tree of Alternative parslets, instead store them
# all here. This reduces the number of objects created.
#+++
def |(parslet)
self.class.new(*@alternatives + [parslet])
end
def try(source, context, consume_all)
errors = alternatives.map { |a|
success, value = result = a.apply(source, context, consume_all)
return result if success
# Aggregate all errors
value
}
# If we reach this point, all alternatives have failed.
context.err(self, source, @error_msg, errors)
end
precedence ALTERNATE
def to_s_inner(prec)
alternatives.map { |a| a.to_s(prec) }.join(' / ')
end
end

151
lib/parslet/atoms/base.rb Normal file
View File

@ -0,0 +1,151 @@
# Base class for all parslets, handles orchestration of calls and implements
# a lot of the operator and chaining methods.
#
# Also see Parslet::Atoms::DSL chaining parslet atoms together.
#
class Parslet::Atoms::Base
include Parslet::Atoms::Precedence
include Parslet::Atoms::DSL
include Parslet::Atoms::CanFlatten
# Given a string or an IO object, this will attempt a parse of its contents
# and return a result. If the parse fails, a Parslet::ParseFailed exception
# will be thrown.
#
# @param io [String, Source] input for the parse process
# @option options [Parslet::ErrorReporter] :reporter error reporter to use,
# defaults to Parslet::ErrorReporter::Tree
# @option options [Boolean] :prefix Should a prefix match be accepted?
# (default: false)
# @return [Hash, Array, Parslet::Slice] PORO (Plain old Ruby object) result
# tree
#
def parse(io, options={})
source = io.respond_to?(:line_and_column) ?
io :
Parslet::Source.new(io)
# Try to cheat. Assuming that we'll be able to parse the input, don't
# run error reporting code.
success, value = setup_and_apply(source, nil, !options[:prefix])
# If we didn't succeed the parse, raise an exception for the user.
# Stack trace will be off, but the error tree should explain the reason
# it failed.
unless success
# Cheating has not paid off. Now pay the cost: Rerun the parse,
# gathering error information in the process.
reporter = options[:reporter] || Parslet::ErrorReporter::Tree.new
source.pos = 0
success, value = setup_and_apply(source, reporter, !options[:prefix])
fail "Assertion failed: success was true when parsing with reporter" \
if success
# Value is a Parslet::Cause, which can be turned into an exception:
value.raise
fail "NEVER REACHED"
end
# assert: success is true
# Extra input is now handled inline with the rest of the parsing. If
# really we have success == true, prefix: false and still some input
# is left dangling, that is a BUG.
if !options[:prefix] && source.chars_left > 0
fail "BUG: New error strategy should not reach this point."
end
return flatten(value)
end
# Creates a context for parsing and applies the current atom to the input.
# Returns the parse result.
#
# @return [<Boolean, Object>] Result of the parse. If the first member is
# true, the parse has succeeded.
def setup_and_apply(source, error_reporter, consume_all)
context = Parslet::Atoms::Context.new(error_reporter)
apply(source, context, consume_all)
end
# Calls the #try method of this parslet. Success consumes input, error will
# rewind the input.
#
# @param source [Parslet::Source] source to read input from
# @param context [Parslet::Atoms::Context] context to use for the parsing
# @param consume_all [Boolean] true if the current parse must consume
# all input by itself.
def apply(source, context, consume_all=false)
old_pos = source.pos
success, value = result = context.try_with_cache(self, source, consume_all)
if success
# If a consume_all parse was made and doesn't result in the consumption
# of all the input, that is considered an error.
if consume_all && source.chars_left>0
# Read 10 characters ahead. Why ten? I don't know.
offending_pos = source.pos
offending_input = source.consume(10)
# Rewind input (as happens always in error case)
source.pos = old_pos
return context.err_at(
self,
source,
"Don't know what to do with #{offending_input.to_s.inspect}",
offending_pos
)
end
# Looks like the parse was successful after all. Don't rewind the input.
return result
end
# We only reach this point if the parse has failed. Rewind the input.
source.pos = old_pos
return result
end
# Override this in your Atoms::Base subclasses to implement parsing
# behaviour.
#
def try(source, context, consume_all)
raise NotImplementedError, \
"Atoms::Base doesn't have behaviour, please implement #try(source, context)."
end
# Returns true if this atom can be cached in the packrat cache. Most parslet
# atoms are cached, so this always returns true, unless overridden.
#
def cached?
true
end
# Debug printing - in Treetop syntax.
#
def self.precedence(prec)
define_method(:precedence) { prec }
end
precedence BASE
def to_s(outer_prec=OUTER)
if outer_prec < precedence
"("+to_s_inner(precedence)+")"
else
to_s_inner(precedence)
end
end
def inspect
to_s(OUTER)
end
private
# Produces an instance of Success and returns it.
#
def succ(result)
[true, result]
end
end

View File

@ -0,0 +1,137 @@
module Parslet::Atoms
# A series of helper functions that have the common topic of flattening
# result values into the intermediary tree that consists of Ruby Hashes and
# Arrays.
#
# This module has one main function, #flatten, that takes an annotated
# structure as input and returns the reduced form that users expect from
# Atom#parse.
#
# NOTE: Since all of these functions are just that, functions without
# side effects, they are in a module and not in a class. Its hard to draw
# the line sometimes, but this is beyond.
#
module CanFlatten
# Takes a mixed value coming out of a parslet and converts it to a return
# value for the user by dropping things and merging hashes.
#
# Named is set to true if this result will be embedded in a Hash result from
# naming something using <code>.as(...)</code>. It changes the folding
# semantics of repetition.
#
def flatten(value, named=false)
# Passes through everything that isn't an array of things
return value unless value.instance_of? Array
# Extracts the s-expression tag
tag, *tail = value
# Merges arrays:
result = tail.
map { |e| flatten(e) } # first flatten each element
case tag
when :sequence
return flatten_sequence(result)
when :maybe
return named ? result.first : result.first || ''
when :repetition
return flatten_repetition(result, named)
end
fail "BUG: Unknown tag #{tag.inspect}."
end
# Lisp style fold left where the first element builds the basis for
# an inject.
#
def foldl(list, &block)
return '' if list.empty?
list[1..-1].inject(list.first, &block)
end
# Flatten results from a sequence of parslets.
#
# @api private
#
def flatten_sequence(list)
foldl(list.compact) { |r, e| # and then merge flat elements
merge_fold(r, e)
}
end
# @api private
def merge_fold(l, r)
# equal pairs: merge. ----------------------------------------------------
if l.class == r.class
if l.is_a?(Hash)
warn_about_duplicate_keys(l, r)
return l.merge(r)
else
return l + r
end
end
# unequal pairs: hoist to same level. ------------------------------------
# Maybe classes are not equal, but both are stringlike?
if l.respond_to?(:to_str) && r.respond_to?(:to_str)
# if we're merging a String with a Slice, the slice wins.
return r if r.respond_to? :to_slice
return l if l.respond_to? :to_slice
fail "NOTREACHED: What other stringlike classes are there?"
end
# special case: If one of them is a string/slice, the other is more important
return l if r.respond_to? :to_str
return r if l.respond_to? :to_str
# otherwise just create an array for one of them to live in
return l + [r] if r.class == Hash
return [l] + r if l.class == Hash
fail "Unhandled case when foldr'ing sequence."
end
# Flatten results from a repetition of a single parslet. named indicates
# whether the user has named the result or not. If the user has named
# the results, we want to leave an empty list alone - otherwise it is
# turned into an empty string.
#
# @api private
#
def flatten_repetition(list, named)
if list.any? { |e| e.instance_of?(Hash) }
# If keyed subtrees are in the array, we'll want to discard all
# strings inbetween. To keep them, name them.
return list.select { |e| e.instance_of?(Hash) }
end
if list.any? { |e| e.instance_of?(Array) }
# If any arrays are nested in this array, flatten all arrays to this
# level.
return list.
select { |e| e.instance_of?(Array) }.
flatten(1)
end
# Consistent handling of empty lists, when we act on a named result
return [] if named && list.empty?
# If there are only strings, concatenate them and return that.
foldl(list) { |s,e| s+e }
end
# That annoying warning 'Duplicate subtrees while merging result' comes
# from here. You should add more '.as(...)' names to your intermediary tree.
#
def warn_about_duplicate_keys(h1, h2)
d = h1.keys & h2.keys
unless d.empty?
warn "Duplicate subtrees while merging result of \n #{self.inspect}\nonly the values"+
" of the latter will be kept. (keys: #{d.inspect})"
end
end
end
end

View File

@ -0,0 +1,38 @@
# Stores the result of matching an atom against input in the #captures in
# parse context. Doing so will allow you to pull parts of the ongoing parse
# out later and use them to match other pieces of input.
#
# Example:
# # After this, context.captures[:an_a] returns 'a'
# str('a').capture(:an_a)
#
# # Capture and use of the capture: (matches either 'aa' or 'bb')
# match['ab'].capture(:first) >>
# dynamic { |src, ctx| str(ctx.captures[:first]) }
#
class Parslet::Atoms::Capture < Parslet::Atoms::Base
attr_reader :parslet, :name
def initialize(parslet, name)
super()
@parslet, @name = parslet, name
end
def apply(source, context, consume_all)
success, value = result = parslet.apply(source, context, consume_all)
if success
context.captures[name.to_sym] =
flatten(value)
end
return result
end
def to_s_inner(prec)
"(#{name.inspect} = #{parslet.to_s(prec)})"
end
end

View File

@ -0,0 +1,91 @@
module Parslet::Atoms
# Helper class that implements a transient cache that maps position and
# parslet object to results. This is used for memoization in the packrat
# style.
#
# Also, error reporter is stored here and error reporting happens through
# this class. This makes the reporting pluggable.
#
class Context
# @param reporter [#err, #err_at] Error reporter (leave empty for default
# reporter)
def initialize(reporter=Parslet::ErrorReporter::Tree.new)
@cache = Hash.new { |h, k| h[k] = {} }
@reporter = reporter
@captures = Parslet::Scope.new
end
# Caches a parse answer for obj at source.pos. Applying the same parslet
# at one position of input always yields the same result, unless the input
# has changed.
#
# We need the entire source here so we can ask for how many characters
# were consumed by a successful parse. Imitation of such a parse must
# advance the input pos by the same amount of bytes.
#
def try_with_cache(obj, source, consume_all)
beg = source.pos
# Not in cache yet? Return early.
unless entry = lookup(obj, beg)
result = obj.try(source, self, consume_all)
if obj.cached?
set obj, beg, [result, source.pos-beg]
end
return result
end
# the condition in unless has returned true, so entry is not nil.
result, advance = entry
# The data we're skipping here has been read before. (since it is in
# the cache) PLUS the actual contents are not interesting anymore since
# we know obj matches at beg. So skip reading.
source.pos = beg + advance
return result
end
# Report an error at a given position.
# @see ErrorReporter
#
def err_at(*args)
return [false, @reporter.err_at(*args)] if @reporter
return [false, nil]
end
# Report an error.
# @see ErrorReporter
#
def err(*args)
return [false, @reporter.err(*args)] if @reporter
return [false, nil]
end
# Returns the current captures made on the input (see
# Parslet::Atoms::Base#capture). Use as follows:
#
# context.captures[:foobar] # => returns capture :foobar
#
attr_reader :captures
# Starts a new scope. Use the #scope method of Parslet::Atoms::DSL
# to call this.
#
def scope
captures.push
yield
ensure
captures.pop
end
private
def lookup(obj, pos)
@cache[pos][obj]
end
def set(obj, pos, val)
@cache[pos][obj] = val
end
end
end

109
lib/parslet/atoms/dsl.rb Normal file
View File

@ -0,0 +1,109 @@
# A mixin module that defines operations that can be called on any subclass
# of Parslet::Atoms::Base. These operations make parslets atoms chainable and
# allow combination of parslet atoms to form bigger parsers.
#
# Example:
#
# str('foo') >> str('bar')
# str('f').repeat
# any.absent? # also called The Epsilon
#
module Parslet::Atoms::DSL
# Construct a new atom that repeats the current atom min times at least and
# at most max times. max can be nil to indicate that no maximum is present.
#
# Example:
# # match any number of 'a's
# str('a').repeat
#
# # match between 1 and 3 'a's
# str('a').repeat(1,3)
#
def repeat(min=0, max=nil)
Parslet::Atoms::Repetition.new(self, min, max)
end
# Returns a new parslet atom that is only maybe present in the input. This
# is synonymous to calling #repeat(0,1). Generated tree value will be
# either nil (if atom is not present in the input) or the matched subtree.
#
# Example:
# str('foo').maybe
#
def maybe
Parslet::Atoms::Repetition.new(self, 0, 1, :maybe)
end
# Chains two parslet atoms together as a sequence.
#
# Example:
# str('a') >> str('b')
#
def >>(parslet)
Parslet::Atoms::Sequence.new(self, parslet)
end
# Chains two parslet atoms together to express alternation. A match will
# always be attempted with the parslet on the left side first. If it doesn't
# match, the right side will be tried.
#
# Example:
# # matches either 'a' OR 'b'
# str('a') | str('b')
#
def |(parslet)
Parslet::Atoms::Alternative.new(self, parslet)
end
# Tests for absence of a parslet atom in the input stream without consuming
# it.
#
# Example:
# # Only proceed the parse if 'a' is absent.
# str('a').absent?
#
def absent?
Parslet::Atoms::Lookahead.new(self, false)
end
# Tests for presence of a parslet atom in the input stream without consuming
# it.
#
# Example:
# # Only proceed the parse if 'a' is present.
# str('a').present?
#
def present?
Parslet::Atoms::Lookahead.new(self, true)
end
# Alias for present? that will disappear in 2.0 (deprecated)
#
alias prsnt? present?
# Alias for absent? that will disappear in 2.0 (deprecated)
#
alias absnt? absent?
# Marks a parslet atom as important for the tree output. This must be used
# to achieve meaningful output from the #parse method.
#
# Example:
# str('a').as(:b) # will produce {:b => 'a'}
#
def as(name)
Parslet::Atoms::Named.new(self, name)
end
# Captures a part of the input and stores it under the name given. This
# is very useful to create self-referential parses. A capture stores
# the result of its parse (may be complex) on a successful parse action.
#
# Example:
# str('a').capture(:b) # will store captures[:b] == 'a'
#
def capture(name)
Parslet::Atoms::Capture.new(self, name)
end
end

View File

@ -0,0 +1,32 @@
# Evaluates a block at parse time. The result from the block must be a parser
# (something which implements #apply). In the first case, the parser will then
# be applied to the input, creating the result.
#
# Dynamic parses are never cached.
#
# Example:
# dynamic { rand < 0.5 ? str('a') : str('b') }
#
class Parslet::Atoms::Dynamic < Parslet::Atoms::Base
attr_reader :block
def initialize(block)
@block = block
end
def cached?
false
end
def try(source, context, consume_all)
result = block.call(source, context)
# Result is a parslet atom.
return result.apply(source, context, consume_all)
end
def to_s_inner(prec)
"dynamic { ... }"
end
end

View File

@ -0,0 +1,41 @@
# This wraps pieces of parslet definition and gives them a name. The wrapped
# piece is lazily evaluated and cached. This has two purposes:
#
# * Avoid infinite recursion during evaluation of the definition
# * Be able to print things by their name, not by their sometimes
# complicated content.
#
# You don't normally use this directly, instead you should generated it by
# using the structuring method Parslet.rule.
#
class Parslet::Atoms::Entity < Parslet::Atoms::Base
attr_reader :name, :block
def initialize(name, &block)
super()
@name = name
@block = block
end
def try(source, context, consume_all)
parslet.apply(source, context, consume_all)
end
def parslet
@parslet ||= @block.call.tap { |p|
raise_not_implemented unless p
}
end
def to_s_inner(prec)
name.to_s.upcase
end
private
def raise_not_implemented
trace = caller.reject {|l| l =~ %r{#{Regexp.escape(__FILE__)}}} # blatantly stolen from dependencies.rb in activesupport
exception = NotImplementedError.new("rule(#{name.inspect}) { ... } returns nil. Still not implemented, but already used?")
exception.set_backtrace(trace)
raise exception
end
end

121
lib/parslet/atoms/infix.rb Normal file
View File

@ -0,0 +1,121 @@
class Parslet::Atoms::Infix < Parslet::Atoms::Base
attr_reader :element, :operations
def initialize(element, operations)
super()
@element = element
@operations = operations
end
def try(source, context, consume_all)
return catch_error {
return succ(
produce_tree(
precedence_climb(source, context, consume_all)))
}
end
# Turns an array of the form ['1', '+', ['2', '*', '3']] into a hash that
# reflects the same structure.
#
def produce_tree(ary)
return ary unless ary.kind_of? Array
left = ary.shift
until ary.empty?
op, right = ary.shift(2)
# p [left, op, right]
if right.kind_of? Array
# Subexpression -> Subhash
left = {l: left, o: op, r: produce_tree(right)}
else
left = {l: left, o: op, r: right}
end
end
left
end
# A precedence climbing algorithm married to parslet, as described here
# http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing/
#
# @note Error handling in this routine is done by throwing :error and
# as a value the error to return to parslet. This avoids cluttering
# the recursion logic here with parslet error handling.
#
def precedence_climb(source, context, consume_all, current_prec=1, needs_element=false)
result = []
# To even begin parsing an arithmetic expression, there needs to be
# at least one @element.
success, value = @element.apply(source, context, false)
unless success
abort context.err(self, source, "#{@element.inspect} was expected", [value])
end
result << flatten(value, true)
# Loop until we fail on operator matching or until input runs out.
loop do
op_pos = source.pos
op_match, prec, assoc = match_operation(source, context, false)
# If no operator could be matched here, one of several cases
# applies:
#
# - end of file
# - end of expression
# - syntax error
#
# We abort matching the expression here.
break unless op_match
if prec >= current_prec
next_prec = (assoc == :left) ? prec+1 : prec
result << op_match
result << precedence_climb(
source, context, consume_all, next_prec, true)
else
source.pos = op_pos
return unwrap(result)
end
end
return unwrap(result)
end
def unwrap expr
expr.size == 1 ? expr.first : expr
end
def match_operation(source, context, consume_all)
errors = []
@operations.each do |op_atom, prec, assoc|
success, value = op_atom.apply(source, context, consume_all)
return flatten(value, true), prec, assoc if success
# assert: this was in fact an error, accumulate
errors << value
end
return nil
end
def abort(error)
throw :error, error
end
def catch_error
catch(:error) { yield }
end
def to_s_inner(prec)
ops = @operations.map { |o, _, _| o.inspect }.join(', ')
"infix_expression(#{@element.inspect}, [#{ops}])"
end
end

View File

@ -0,0 +1,49 @@
# Either positive or negative lookahead, doesn't consume its input.
#
# Example:
#
# str('foo').present? # matches when the input contains 'foo', but leaves it
#
class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
attr_reader :positive
attr_reader :bound_parslet
def initialize(bound_parslet, positive=true)
super()
# Model positive and negative lookahead by testing this flag.
@positive = positive
@bound_parslet = bound_parslet
@error_msgs = {
:positive => ["Input should start with ", bound_parslet],
:negative => ["Input should not start with ", bound_parslet]
}
end
def try(source, context, consume_all)
pos = source.pos
success, value = bound_parslet.apply(source, context, consume_all)
if positive
return succ(nil) if success
return context.err_at(self, source, @error_msgs[:positive], pos)
else
return succ(nil) unless success
return context.err_at(self, source, @error_msgs[:negative], pos)
end
# This is probably the only parslet that rewinds its input in #try.
# Lookaheads NEVER consume their input, even on success, that's why.
ensure
source.pos = pos
end
precedence LOOKAHEAD
def to_s_inner(prec)
char = positive ? '&' : '!'
"#{char}#{bound_parslet.to_s(prec)}"
end
end

View File

@ -0,0 +1,32 @@
# Names a match to influence tree construction.
#
# Example:
#
# str('foo') # will return 'foo',
# str('foo').as(:foo) # will return :foo => 'foo'
#
class Parslet::Atoms::Named < Parslet::Atoms::Base
attr_reader :parslet, :name
def initialize(parslet, name)
super()
@parslet, @name = parslet, name
end
def apply(source, context, consume_all)
success, value = result = parslet.apply(source, context, consume_all)
return result unless success
succ(
produce_return_value(
value))
end
def to_s_inner(prec)
"#{name}:#{parslet.to_s(prec)}"
end
private
def produce_return_value(val)
{ name => flatten(val, true) }
end
end

38
lib/parslet/atoms/re.rb Normal file
View File

@ -0,0 +1,38 @@
# Matches a special kind of regular expression that only ever matches one
# character at a time. Useful members of this family are: <code>character
# ranges, \\w, \\d, \\r, \\n, ...</code>
#
# Example:
#
# match('[a-z]') # matches a-z
# match('\s') # like regexps: matches space characters
#
class Parslet::Atoms::Re < Parslet::Atoms::Base
attr_reader :match, :re
def initialize(match)
super()
@match = match.to_s
@re = Regexp.new(self.match, Regexp::MULTILINE)
@error_msgs = {
:premature => "Premature end of input",
:failed => "Failed to match #{match.inspect[1..-2]}"
}
end
def try(source, context, consume_all)
return succ(source.consume(1)) if source.matches?(@re)
# No string could be read
return context.err(self, source, @error_msgs[:premature]) \
if source.chars_left < 1
# No match
return context.err(self, source, @error_msgs[:failed])
end
def to_s_inner(prec)
match.inspect[1..-2]
end
end

View File

@ -0,0 +1,83 @@
# Matches a parslet repeatedly.
#
# Example:
#
# str('a').repeat(1,3) # matches 'a' at least once, but at most three times
# str('a').maybe # matches 'a' if it is present in the input (repeat(0,1))
#
class Parslet::Atoms::Repetition < Parslet::Atoms::Base
attr_reader :min, :max, :parslet
def initialize(parslet, min, max, tag=:repetition)
super()
raise ArgumentError,
"Asking for zero repetitions of a parslet. (#{parslet.inspect} repeating #{min},#{max})" \
if max == 0
@parslet = parslet
@min, @max = min, max
@tag = tag
@error_msgs = {
:minrep => "Expected at least #{min} of #{parslet.inspect}",
:unconsumed => "Extra input after last repetition"
}
end
def try(source, context, consume_all)
occ = 0
accum = [@tag] # initialize the result array with the tag (for flattening)
start_pos = source.pos
break_on = nil
loop do
success, value = parslet.apply(source, context, false)
break_on = value
break unless success
occ += 1
accum << value
# If we're not greedy (max is defined), check if that has been reached.
return succ(accum) if max && occ>=max
end
# Last attempt to match parslet was a failure, failure reason in break_on.
# Greedy matcher has produced a failure. Check if occ (which will
# contain the number of successes) is >= min.
return context.err_at(
self,
source,
@error_msgs[:minrep],
start_pos,
[break_on]) if occ < min
# consume_all is true, that means that we're inside the part of the parser
# that should consume the input completely. Repetition failing here means
# probably that we didn't.
#
# We have a special clause to create an error here because otherwise
# break_on would get thrown away. It turns out, that contains very
# interesting information in a lot of cases.
#
return context.err(
self,
source,
@error_msgs[:unconsumed],
[break_on]) if consume_all && source.chars_left>0
return succ(accum)
end
precedence REPETITION
def to_s_inner(prec)
minmax = "{#{min}, #{max}}"
minmax = '?' if min == 0 && max == 1
parslet.to_s(prec) + minmax
end
end

View File

@ -0,0 +1,26 @@
# Starts a new scope in the parsing process. Please also see the #captures
# method.
#
class Parslet::Atoms::Scope < Parslet::Atoms::Base
attr_reader :block
def initialize(block)
super()
@block = block
end
def cached?
false
end
def apply(source, context, consume_all)
context.scope do
parslet = block.call
return parslet.apply(source, context, consume_all)
end
end
def to_s_inner(prec)
"scope { #{block.call.to_s(prec)} }"
end
end

View File

@ -0,0 +1,45 @@
# A sequence of parslets, matched from left to right. Denoted by '>>'
#
# Example:
#
# str('a') >> str('b') # matches 'a', then 'b'
#
class Parslet::Atoms::Sequence < Parslet::Atoms::Base
attr_reader :parslets
def initialize(*parslets)
super()
@parslets = parslets
@error_msgs = {
:failed => "Failed to match sequence (#{self.inspect})"
}
end
def >>(parslet)
self.class.new(* @parslets+[parslet])
end
def try(source, context, consume_all)
# Presize an array
result = Array.new(parslets.size + 1)
result[0] = :sequence
parslets.each_with_index do |p, idx|
child_consume_all = consume_all && (idx == parslets.size-1)
success, value = p.apply(source, context, child_consume_all)
unless success
return context.err(self, source, @error_msgs[:failed], [value])
end
result[idx+1] = value
end
return succ(result)
end
precedence SEQUENCE
def to_s_inner(prec)
parslets.map { |p| p.to_s(prec) }.join(' ')
end
end

39
lib/parslet/atoms/str.rb Normal file
View File

@ -0,0 +1,39 @@
# Matches a string of characters.
#
# Example:
#
# str('foo') # matches 'foo'
#
class Parslet::Atoms::Str < Parslet::Atoms::Base
attr_reader :str
def initialize(str)
super()
@str = str.to_s
@pat = Regexp.new(Regexp.escape(str))
@len = str.size
@error_msgs = {
:premature => "Premature end of input",
:failed => "Expected #{str.inspect}, but got "
}
end
def try(source, context, consume_all)
return succ(source.consume(@len)) if source.matches?(@pat)
# Input ending early:
return context.err(self, source, @error_msgs[:premature]) \
if source.chars_left<@len
# Expected something, but got something else instead:
error_pos = source.pos
return context.err_at(
self, source,
[@error_msgs[:failed], source.consume(@len)], error_pos)
end
def to_s_inner(prec)
"'#{str}'"
end
end

View File

@ -0,0 +1,89 @@
# Augments all parslet atoms with an accept method that will call back
# to the visitor given.
#
module Parslet::Atoms
class Base
def accept(visitor)
raise NotImplementedError, "No #accept method on #{self.class.name}."
end
end
class Str
# Call back visitors #visit_str method. See parslet/export for an example.
#
def accept(visitor)
visitor.visit_str(str)
end
end
class Entity
# Call back visitors #visit_entity method. See parslet/export for an
# example.
#
def accept(visitor)
visitor.visit_entity(name, block)
end
end
class Named
# Call back visitors #visit_named method. See parslet/export for an
# example.
#
def accept(visitor)
visitor.visit_named(name, parslet)
end
end
class Sequence
# Call back visitors #visit_sequence method. See parslet/export for an
# example.
#
def accept(visitor)
visitor.visit_sequence(parslets)
end
end
class Repetition
# Call back visitors #visit_repetition method. See parslet/export for an
# example.
#
def accept(visitor)
visitor.visit_repetition(@tag, min, max, parslet)
end
end
class Alternative
# Call back visitors #visit_alternative method. See parslet/export for an
# example.
#
def accept(visitor)
visitor.visit_alternative(alternatives)
end
end
class Lookahead
# Call back visitors #visit_lookahead method. See parslet/export for an
# example.
#
def accept(visitor)
visitor.visit_lookahead(positive, bound_parslet)
end
end
class Re
# Call back visitors #visit_re method. See parslet/export for an example.
#
def accept(visitor)
visitor.visit_re(match)
end
end
end
class Parslet::Parser
# Call back visitors #visit_parser method.
#
def accept(visitor)
visitor.visit_parser(root)
end
end

94
lib/parslet/cause.rb Normal file
View File

@ -0,0 +1,94 @@
module Parslet
# Represents a cause why a parse did fail. A lot of these objects are
# constructed - not all of the causes turn out to be failures for the whole
# parse.
#
class Cause
def initialize(message, source, pos, children)
@message, @source, @pos, @children =
message, source, pos, children
end
# @return [String, Array] A string or an array of message pieces that
# provide failure information. Use #to_s to get a formatted string.
attr_reader :message
# @return [Parslet::Source] Source that was parsed when this error
# happend. Mainly used for line number information.
attr_reader :source
# Location of the error.
#
# @return [Fixnum] Position where the error happened. (character offset)
attr_reader :pos
# When this cause is part of a tree of error causes: child nodes for this
# node. Very often carries the reasons for this cause.
#
# @return [Array<Parslet::Cause>] A list of reasons for this cause.
def children
@children ||= []
end
# Appends 'at line LINE char CHAR' to the string given. Use +pos+ to
# override the position of the +source+. This method returns an object
# that can be turned into a string using #to_s.
#
# @param source [Parslet::Source] source that was parsed when this error
# happened
# @param pos [Fixnum] position of error
# @param str [String, Array<String>] message parts
# @param children [Array<Parslet::Cause>] child nodes for this error tree
# @return [Parslet::Cause] a new instance of {Parslet::Cause}
#
def self.format(source, pos, str, children=[])
self.new(str, source, pos, children)
end
def to_s
line, column = source.line_and_column(pos)
# Allow message to be a list of objects. Join them here, since we now
# really need it.
Array(message).map { |o|
o.respond_to?(:to_slice) ?
o.str.inspect :
o.to_s }.join + " at line #{line} char #{column}."
end
# Signals to the outside that the parse has failed. Use this in
# conjunction with .format for nice error messages.
#
def raise(exception_klass=Parslet::ParseFailed)
exception = exception_klass.new(self.to_s, self)
Kernel.raise exception
end
# Returns an ascii tree representation of the causes of this node and its
# children.
#
def ascii_tree
StringIO.new.tap { |io|
recursive_ascii_tree(self, io, [true]) }.
string
end
private
def recursive_ascii_tree(node, stream, curved)
append_prefix(stream, curved)
stream.puts node.to_s
node.children.each do |child|
last_child = (node.children.last == child)
recursive_ascii_tree(child, stream, curved + [last_child])
end
end
def append_prefix(stream, curved)
return if curved.size < 2
curved[1..-2].each do |c|
stream.print c ? " " : "| "
end
stream.print curved.last ? "`- " : "|- "
end
end
end

33
lib/parslet/context.rb Normal file
View File

@ -0,0 +1,33 @@
require 'blankslate'
# Provides a context for tree transformations to run in. The context allows
# accessing each of the bindings in the bindings hash as local method.
#
# Example:
#
# ctx = Context.new(:a => :b)
# ctx.instance_eval do
# a # => :b
# end
#
# @api private
class Parslet::Context < BlankSlate
reveal :methods
reveal :respond_to?
reveal :inspect
reveal :to_s
reveal :instance_variable_set
def meta_def(name, &body)
metaclass = class <<self; self; end
metaclass.send(:define_method, name, &body)
end
def initialize(bindings)
bindings.each do |key, value|
meta_def(key.to_sym) { value }
instance_variable_set("@#{key}", value)
end
end
end

View File

@ -0,0 +1,33 @@
class Parslet::Atoms::Base
# Packages the common idiom
#
# begin
# tree = parser.parse('something')
# rescue Parslet::ParseFailed => error
# puts parser.cause.ascii_tree
# end
#
# into a convenient method.
#
# Usage:
#
# require 'parslet'
# require 'parslet/convenience'
#
# class FooParser < Parslet::Parser
# rule(:foo) { str('foo') }
# root(:foo)
# end
#
# FooParser.new.parse_with_debug('bar')
#
# @see Parslet::Atoms::Base#parse
#
def parse_with_debug str, opts={}
parse str, opts
rescue Parslet::ParseFailed => error
puts error.cause.ascii_tree
end
end

View File

@ -0,0 +1,7 @@
# A namespace for all error reporters.
#
module Parslet::ErrorReporter
end
require 'parslet/error_reporter/tree'
require 'parslet/error_reporter/deepest'

View File

@ -0,0 +1,95 @@
module Parslet
module ErrorReporter
# Instead of reporting the latest error that happens like {Tree} does,
# this class reports the deepest error. Depth is defined here as how
# advanced into the input an error happens. The errors close to the
# greatest depth tend to be more relevant to the end user, since they
# specify what could be done to make them go away.
#
# More specifically, errors produced by this reporter won't be related to
# the structure of the grammar at all. The positions of the errors will
# be advanced and convey at every grammar level what the deepest rule
# was to fail.
#
class Deepest
def initialize
@deepest_cause = nil
end
# Produces an error cause that combines the message at the current level
# with the errors that happened at a level below (children).
#
# @param atom [Parslet::Atoms::Base] parslet that failed
# @param source [Source] Source that we're using for this parse. (line
# number information...)
# @param message [String, Array] Error message at this level.
# @param children [Array] A list of errors from a deeper level (or nil).
# @return [Cause] An error tree combining children with message.
#
def err(atom, source, message, children=nil)
position = source.pos
cause = Cause.format(source, position, message, children)
return deepest(cause)
end
# Produces an error cause that combines the message at the current level
# with the errors that happened at a level below (children).
#
# @param atom [Parslet::Atoms::Base] parslet that failed
# @param source [Source] Source that we're using for this parse. (line
# number information...)
# @param message [String, Array] Error message at this level.
# @param pos [Fixnum] The real position of the error.
# @param children [Array] A list of errors from a deeper level (or nil).
# @return [Cause] An error tree combining children with message.
#
def err_at(atom, source, message, pos, children=nil)
position = pos
cause = Cause.format(source, position, message, children)
return deepest(cause)
end
# Returns the cause that is currently deepest. Mainly for specs.
#
attr_reader :deepest_cause
# Checks to see if the lineage of the cause given includes a cause with
# an error position deeper than the current deepest cause stored. If
# yes, it passes the cause through to the caller. If no, it returns the
# current deepest error that was saved as a reference.
#
def deepest(cause)
rank, leaf = deepest_child(cause)
if !deepest_cause || leaf.pos >= deepest_cause.pos
# This error reaches deeper into the input, save it as reference.
@deepest_cause = leaf
return cause
end
return deepest_cause
end
private
# Returns the leaf from a given error tree with the biggest rank.
#
def deepest_child(cause, rank=0)
max_child = cause
max_rank = rank
if cause.children && !cause.children.empty?
cause.children.each do |child|
c_rank, c_cause = deepest_child(child, rank+1)
if c_rank > max_rank
max_rank = c_rank
max_child = c_cause
end
end
end
return max_rank, max_child
end
end
end
end

View File

@ -0,0 +1,57 @@
module Parslet
module ErrorReporter
# An error reporter has two central methods, one for reporting errors at
# the current parse position (#err) and one for reporting errors at a
# given parse position (#err_at). The reporter can return an object (a
# 'cause') that will be returned to the caller along with the information
# that the parse failed.
#
# When reporting errors on the outer levels of your parser, these methods
# get passed a list of error objects ('causes') from the inner levels. In
# this default implementation, the inner levels are considered error
# subtrees and are appended to the generated tree node at each level,
# thereby constructing an error tree.
#
# This error tree will report in parallel with the grammar structure that
# failed. A one-to-one correspondence exists between each error in the
# tree and the parslet atom that produced that error.
#
# The implementor is really free to use these return values as he sees
# fit. One example would be to return an error state object from these
# methods that is then updated as errors cascade up the parse derivation
# tree.
#
class Tree
# Produces an error cause that combines the message at the current level
# with the errors that happened at a level below (children).
#
# @param atom [Parslet::Atoms::Base] parslet that failed
# @param source [Source] Source that we're using for this parse. (line
# number information...)
# @param message [String, Array] Error message at this level.
# @param children [Array] A list of errors from a deeper level (or nil).
# @return [Cause] An error tree combining children with message.
#
def err(atom, source, message, children=nil)
position = source.pos
Cause.format(source, position, message, children)
end
# Produces an error cause that combines the message at the current level
# with the errors that happened at a level below (children).
#
# @param atom [Parslet::Atoms::Base] parslet that failed
# @param source [Source] Source that we're using for this parse. (line
# number information...)
# @param message [String, Array] Error message at this level.
# @param pos [Fixnum] The real position of the error.
# @param children [Array] A list of errors from a deeper level (or nil).
# @return [Cause] An error tree combining children with message.
#
def err_at(atom, source, message, pos, children=nil)
position = pos
Cause.format(source, position, message, children)
end
end
end
end

162
lib/parslet/export.rb Normal file
View File

@ -0,0 +1,162 @@
# Allows exporting parslet grammars to other lingos.
require 'set'
require 'parslet/atoms/visitor'
class Parslet::Parser
module Visitors
class Citrus
attr_reader :context, :output
def initialize(context)
@context = context
end
def visit_str(str)
"\"#{str.inspect[1..-2]}\""
end
def visit_re(match)
match.to_s
end
def visit_entity(name, block)
context.deferred(name, block)
"(#{context.mangle_name(name)})"
end
def visit_named(name, parslet)
parslet.accept(self)
end
def visit_sequence(parslets)
'(' <<
parslets.
map { |el| el.accept(self) }.
join(' ') <<
')'
end
def visit_repetition(tag, min, max, parslet)
parslet.accept(self) << "#{min}*#{max}"
end
def visit_alternative(alternatives)
'(' <<
alternatives.
map { |el| el.accept(self) }.
join(' | ') <<
')'
end
def visit_lookahead(positive, bound_parslet)
(positive ? '&' : '!') <<
bound_parslet.accept(self)
end
end
class Treetop < Citrus
def visit_repetition(tag, min, max, parslet)
parslet.accept(self) << "#{min}..#{max}"
end
def visit_alternative(alternatives)
'(' <<
alternatives.
map { |el| el.accept(self) }.
join(' / ') <<
')'
end
end
end
# A helper class that formats Citrus and Treetop grammars as a string.
#
class PrettyPrinter
attr_reader :visitor
def initialize(visitor_klass)
@visitor = visitor_klass.new(self)
end
# Pretty prints the given parslet using the visitor that has been
# configured in initialize. Returns the string representation of the
# Citrus or Treetop grammar.
#
def pretty_print(name, parslet)
output = "grammar #{name}\n"
output << rule('root', parslet)
seen = Set.new
loop do
# @todo is constantly filled by the visitor (see #deferred). We
# keep going until it is empty.
break if @todo.empty?
name, block = @todo.shift
# Track what rules we've already seen. This breaks loops.
next if seen.include?(name)
seen << name
output << rule(name, block.call)
end
output << "end\n"
end
# Formats a rule in either dialect.
#
def rule(name, parslet)
" rule #{mangle_name name}\n" <<
" " << parslet.accept(visitor) << "\n" <<
" end\n"
end
# Whenever the visitor encounters an rule in a parslet, it defers the
# pretty printing of the rule by calling this method.
#
def deferred(name, content)
@todo ||= []
@todo << [name, content]
end
# Mangles names so that Citrus and Treetop can live with it. This mostly
# transforms some of the things that Ruby allows into other patterns. If
# there is collision, we will not detect it for now.
#
def mangle_name(str)
str.to_s.sub(/\?$/, '_p')
end
end
# Exports the current parser instance as a string in the Citrus dialect.
#
# Example:
#
# require 'parslet/export'
# class MyParser < Parslet::Parser
# root(:expression)
# rule(:expression) { str('foo') }
# end
#
# MyParser.new.to_citrus # => a citrus grammar as a string
#
def to_citrus
PrettyPrinter.new(Visitors::Citrus).
pretty_print(self.class.name, root)
end
# Exports the current parser instance as a string in the Treetop dialect.
#
# Example:
#
# require 'parslet/export'
# class MyParser < Parslet::Parser
# root(:expression)
# rule(:expression) { str('foo') }
# end
#
# MyParser.new.to_treetop # => a treetop grammar as a string
#
def to_treetop
PrettyPrinter.new(Visitors::Treetop).
pretty_print(self.class.name, root)
end
end

51
lib/parslet/expression.rb Normal file
View File

@ -0,0 +1,51 @@
# Allows specifying rules as strings using the exact same grammar that treetop
# does, minus the actions. This is on one hand a good example of a fully
# fledged parser and on the other hand might even turn out really useful.
#
# This can be viewed as an extension to parslet and might even be hosted in
# its own gem one fine day.
#
class Parslet::Expression
include Parslet
autoload :Treetop, 'parslet/expression/treetop'
# Creates a parslet from a foreign language expression.
#
# Example:
#
# Parslet::Expression.new("'a' 'b'")
#
def initialize(str, opts={}, context=self)
@type = opts[:type] || :treetop
@exp = str
@parslet = transform(
parse(str))
end
# Transforms the parse tree into a parslet expression.
#
def transform(tree)
transform = Treetop::Transform.new
# pp tree
transform.apply(tree)
rescue
warn "Could not transform: " + tree.inspect
raise
end
# Parses the string and returns a parse tree.
#
def parse(str)
parser = Treetop::Parser.new
parser.parse(str)
end
# Turns this expression into a parslet.
#
def to_parslet
@parslet
end
end

View File

@ -0,0 +1,92 @@
class Parslet::Expression::Treetop
class Parser < Parslet::Parser
root(:expression)
rule(:expression) { alternatives }
# alternative 'a' / 'b'
rule(:alternatives) {
(simple >> (spaced('/') >> simple).repeat).as(:alt)
}
# sequence by simple concatenation 'a' 'b'
rule(:simple) { occurrence.repeat(1).as(:seq) }
# occurrence modifiers
rule(:occurrence) {
atom.as(:repetition) >> spaced('*').as(:sign) |
atom.as(:repetition) >> spaced('+').as(:sign) |
atom.as(:repetition) >> repetition_spec |
atom.as(:maybe) >> spaced('?') |
atom
}
rule(:atom) {
spaced('(') >> expression.as(:unwrap) >> spaced(')') |
dot |
string |
char_class
}
# a character class
rule(:char_class) {
(str('[') >>
(str('\\') >> any |
str(']').absent? >> any).repeat(1) >>
str(']')).as(:match) >> space?
}
# anything at all
rule(:dot) { spaced('.').as(:any) }
# recognizing strings
rule(:string) {
str('\'') >>
(
(str('\\') >> any) |
(str("'").absent? >> any)
).repeat.as(:string) >>
str('\'') >> space?
}
# repetition specification like {1, 2}
rule(:repetition_spec) {
spaced('{') >>
integer.maybe.as(:min) >> spaced(',') >>
integer.maybe.as(:max) >> spaced('}')
}
rule(:integer) {
match['0-9'].repeat(1)
}
# whitespace handling
rule(:space) { match("\s").repeat(1) }
rule(:space?) { space.maybe }
def spaced(str)
str(str) >> space?
end
end
class Transform < Parslet::Transform
rule(:repetition => simple(:rep), :sign => simple(:sign)) {
min = sign=='+' ? 1 : 0
Parslet::Atoms::Repetition.new(rep, min, nil) }
rule(:repetition => simple(:rep), :min => simple(:min), :max => simple(:max)) {
Parslet::Atoms::Repetition.new(rep,
Integer(min || 0),
max && Integer(max) || nil) }
rule(:alt => subtree(:alt)) { Parslet::Atoms::Alternative.new(*alt) }
rule(:seq => sequence(:s)) { Parslet::Atoms::Sequence.new(*s) }
rule(:unwrap => simple(:u)) { u }
rule(:maybe => simple(:m)) { |d| d[:m].maybe }
rule(:string => simple(:s)) { Parslet::Atoms::Str.new(s) }
rule(:match => simple(:m)) { Parslet::Atoms::Re.new(m) }
rule(:any => simple(:a)) { Parslet::Atoms::Re.new('.') }
end
end

97
lib/parslet/graphviz.rb Normal file
View File

@ -0,0 +1,97 @@
# Paints a graphviz graph of your parser.
begin
require 'ruby-graphviz'
rescue LoadError
puts "Please install the 'ruby-graphviz' gem first."
fail
end
require 'set'
require 'parslet/atoms/visitor'
module Parslet
class GraphvizVisitor
def initialize g
@graph = g
@known_links = Set.new
@visited = Set.new
end
attr_reader :parent
def visit_parser(root)
recurse root, node('parser')
end
def visit_entity(name, block)
s = node(name)
downwards s
return if @visited.include?(name)
@visited << name
recurse block.call, s
end
def visit_named(name, atom)
recurse atom, parent
end
def visit_repetition(tag, min, max, atom)
recurse atom, parent
end
def visit_alternative(alternatives)
p = parent
alternatives.each do |atom|
recurse atom, p
end
end
def visit_sequence(sequence)
p = parent
sequence.each do |atom|
recurse atom, p
end
end
def visit_lookahead(positive, atom)
recurse atom, parent
end
def visit_re(regexp)
# downwards node(regexp.object_id, label: escape("re(#{regexp.inspect})"))
end
def visit_str(str)
# downwards node(str.object_id, label: escape("#{str.inspect}"))
end
def escape str
str.gsub('"', "'")
end
def node name, opts={}
@graph.add_nodes name.to_s, opts
end
def downwards child
if @parent && !@known_links.include?([@parent, child])
@graph.add_edges(@parent, child)
@known_links << [@parent, child]
end
end
def recurse node, current
@parent = current
node.accept(self)
end
end
module Graphable
def graph opts
g = GraphViz.new(:G, type: :digraph)
visitor = GraphvizVisitor.new(g)
new.accept(visitor)
g.output opts
end
end
class Parser # reopen for introducing the .graph method
extend Graphable
end
end

67
lib/parslet/parser.rb Normal file
View File

@ -0,0 +1,67 @@
# The base class for all your parsers. Use as follows:
#
# require 'parslet'
#
# class MyParser < Parslet::Parser
# rule(:a) { str('a').repeat }
# root(:a)
# end
#
# pp MyParser.new.parse('aaaa') # => 'aaaa'
# pp MyParser.new.parse('bbbb') # => Parslet::Atoms::ParseFailed:
# # Don't know what to do with bbbb at line 1 char 1.
#
# Parslet::Parser is also a grammar atom. This means that you can mix full
# fledged parsers freely with small parts of a different parser.
#
# Example:
# class ParserA < Parslet::Parser
# root :aaa
# rule(:aaa) { str('a').repeat(3,3) }
# end
# class ParserB < Parslet::Parser
# root :expression
# rule(:expression) { str('b') >> ParserA.new >> str('b') }
# end
#
# In the above example, ParserB would parse something like 'baaab'.
#
class Parslet::Parser < Parslet::Atoms::Base
include Parslet
class <<self # class methods
# Define the parsers #root function. This is the place where you start
# parsing; if you have a rule for 'file' that describes what should be
# in a file, this would be your root declaration:
#
# class Parser
# root :file
# rule(:file) { ... }
# end
#
# #root declares a 'parse' function that works just like the parse
# function that you can call on a simple parslet, taking a string as input
# and producing parse output.
#
# In a way, #root is a shorthand for:
#
# def parse(str)
# your_parser_root.parse(str)
# end
#
def root(name)
define_method(:root) do
self.send(name)
end
end
end
def try(source, context, consume_all)
root.try(source, context, consume_all)
end
def to_s_inner(prec)
root.to_s(prec)
end
end

114
lib/parslet/pattern.rb Normal file
View File

@ -0,0 +1,114 @@
# Matches trees against expressions. Trees are formed by arrays and hashes
# for expressing membership and sequence. The leafs of the tree are other
# classes.
#
# A tree issued by the parslet library might look like this:
#
# {
# :function_call => {
# :name => 'foobar',
# :args => [1, 2, 3]
# }
# }
#
# A pattern that would match against this tree would be:
#
# { :function_call => { :name => simple(:name), :args => sequence(:args) }}
#
# Note that Parslet::Pattern only matches at a given subtree; it wont try
# to match recursively. To do that, please use Parslet::Transform.
#
class Parslet::Pattern
def initialize(pattern)
@pattern = pattern
end
# Decides if the given subtree matches this pattern. Returns the bindings
# made on a successful match or nil if the match fails. If you specify
# bindings to be a hash, the mappings in it will be treated like bindings
# made during an attempted match.
#
# Pattern.new('a').match('a', :foo => 'bar') # => { :foo => 'bar' }
#
# @param subtree [String, Hash, Array] poro subtree returned by a parse
# @param bindings [Hash] variable bindings to be verified
# @return [Hash, nil] On success: variable bindings that allow a match. On
# failure: nil
#
def match(subtree, bindings=nil)
bindings = bindings && bindings.dup || Hash.new
return bindings if element_match(subtree, @pattern, bindings)
end
# Returns true if the tree element given by +tree+ matches the expression
# given by +exp+. This match must respect bindings already made in
# +bindings+. Note that bindings is carried along and modified.
#
# @api private
#
def element_match(tree, exp, bindings)
# p [:elm, tree, exp]
case [tree, exp].map { |e| e.class }
when [Hash,Hash]
return element_match_hash(tree, exp, bindings)
when [Array,Array]
return element_match_ary_single(tree, exp, bindings)
else
# If elements match exactly, then that is good enough in all cases
return true if exp === tree
# If exp is a bind variable: Check if the binding matches
if exp.respond_to?(:can_bind?) && exp.can_bind?(tree)
return element_match_binding(tree, exp, bindings)
end
# Otherwise: No match (we don't know anything about the element
# combination)
return false
end
end
# @api private
#
def element_match_binding(tree, exp, bindings)
var_name = exp.variable_name
# TODO test for the hidden :_ feature.
if var_name && bound_value = bindings[var_name]
return bound_value == tree
end
# New binding:
bindings.store var_name, tree
return true
end
# @api private
#
def element_match_ary_single(sequence, exp, bindings)
return false if sequence.size != exp.size
return sequence.zip(exp).all? { |elt, subexp|
element_match(elt, subexp, bindings) }
end
# @api private
#
def element_match_hash(tree, exp, bindings)
# Early failure when one hash is bigger than the other
return false unless exp.size == tree.size
# We iterate over expected pattern, since we demand that the keys that
# are there should be in tree as well.
exp.each do |expected_key, expected_value|
return false unless tree.has_key? expected_key
# Recurse into the value and stop early on failure
value = tree[expected_key]
return false unless element_match(value, expected_value, bindings)
end
return true
end
end

View File

@ -0,0 +1,49 @@
# Used internally for representing a bind placeholder in a Parslet::Transform
# pattern. This is the superclass for all bindings.
#
# It defines the most permissive kind of bind, the one that matches any subtree
# whatever it looks like.
#
class Parslet::Pattern::SubtreeBind < Struct.new(:symbol)
def variable_name
symbol
end
def inspect
"#{bind_type_name}(#{symbol.inspect})"
end
def can_bind?(subtree)
true
end
private
def bind_type_name
if md=self.class.name.match(/(\w+)Bind/)
md.captures.first.downcase
else
# This path should never be used, but since this is for inspection only,
# let's not raise.
'unknown_bind'
end
end
end
# Binds a symbol to a simple subtree, one that is not either a sequence of
# elements or a collection of attributes.
#
class Parslet::Pattern::SimpleBind < Parslet::Pattern::SubtreeBind
def can_bind?(subtree)
not [Hash, Array].include?(subtree.class)
end
end
# Binds a symbol to a sequence of simple leafs ([element1, element2, ...])
#
class Parslet::Pattern::SequenceBind < Parslet::Pattern::SubtreeBind
def can_bind?(subtree)
subtree.kind_of?(Array) &&
(not subtree.any? { |el| [Hash, Array].include?(el.class) })
end
end

59
lib/parslet/rig/rspec.rb Normal file
View File

@ -0,0 +1,59 @@
RSpec::Matchers.define(:parse) do |input, opts|
as = block = nil
result = trace = nil
unless self.respond_to? :failure_message # if RSpec 2.x
class << self
alias_method :failure_message, :failure_message_for_should
alias_method :failure_message_when_negated, :failure_message_for_should_not
end
end
match do |parser|
begin
result = parser.parse(input)
block ?
block.call(result) :
(as == result || as.nil?)
rescue Parslet::ParseFailed => ex
trace = ex.cause.ascii_tree if opts && opts[:trace]
false
end
end
failure_message do |is|
if block
"expected output of parsing #{input.inspect}" <<
" with #{is.inspect} to meet block conditions, but it didn't"
else
"expected " <<
(as ?
"output of parsing #{input.inspect}"<<
" with #{is.inspect} to equal #{as.inspect}, but was #{result.inspect}" :
"#{is.inspect} to be able to parse #{input.inspect}") <<
(trace ?
"\n"+trace :
'')
end
end
failure_message_when_negated do |is|
if block
"expected output of parsing #{input.inspect} with #{is.inspect} not to meet block conditions, but it did"
else
"expected " <<
(as ?
"output of parsing #{input.inspect}"<<
" with #{is.inspect} not to equal #{as.inspect}" :
"#{is.inspect} to not parse #{input.inspect}, but it did")
end
end
# NOTE: This has a nodoc tag since the rdoc parser puts this into
# Object, a thing I would never allow.
chain :as do |expected_output, &block|
as = expected_output
block = block
end
end

42
lib/parslet/scope.rb Normal file
View File

@ -0,0 +1,42 @@
class Parslet::Scope
# Raised when the accessed slot has never been assigned a value.
#
class NotFound < StandardError
end
class Binding
attr_reader :parent
def initialize(parent=nil)
@parent = parent
@hash = Hash.new
end
def [](k)
@hash.has_key?(k) && @hash[k] ||
parent && parent[k] or
raise NotFound
end
def []=(k,v)
@hash.store(k,v)
end
end
def [](k)
@current[k]
end
def []=(k,v)
@current[k] = v
end
def initialize
@current = Binding.new
end
def push
@current = Binding.new(@current)
end
def pop
@current = @current.parent
end
end

101
lib/parslet/slice.rb Normal file
View File

@ -0,0 +1,101 @@
# A slice is a small part from the parse input. A slice mainly behaves like
# any other string, except that it remembers where it came from (offset in
# original input).
#
# == Extracting line and column
#
# Using the #line_and_column method, you can extract the line and column in
# the original input where this slice starts.
#
# Example:
# slice.line_and_column # => [1, 13]
# slice.offset # => 12
#
# == Likeness to strings
#
# Parslet::Slice behaves in many ways like a Ruby String. This likeness
# however is not complete - many of the myriad of operations String supports
# are not yet in Slice. You can always extract the internal string instance by
# calling #to_s.
#
# These omissions are somewhat intentional. Rather than maintaining a full
# delegation, we opt for a partial emulation that gets the job done.
#
class Parslet::Slice
attr_reader :str, :offset
attr_reader :line_cache
# Construct a slice using a string, an offset and an optional line cache.
# The line cache should be able to answer to the #line_and_column message.
#
def initialize(string, offset, line_cache=nil)
@str, @offset = string, offset
@line_cache = line_cache
end
# Compares slices to other slices or strings.
#
def == other
str == other
end
# Match regular expressions.
#
def match(regexp)
str.match(regexp)
end
# Returns the slices size in characters.
#
def size
str.size
end
# Concatenate two slices; it is assumed that the second slice begins
# where the first one ends. The offset of the resulting slice is the same
# as the one of this slice.
#
def +(other)
self.class.new(str + other.to_s, offset, line_cache)
end
# Returns a <line, column> tuple referring to the original input.
#
def line_and_column
raise ArgumentError, "No line cache was given, cannot infer line and column." \
unless line_cache
line_cache.line_and_column(self.offset)
end
# Conversion operators -----------------------------------------------------
def to_str
str
end
alias to_s to_str
def to_slice
self
end
def to_sym
str.to_sym
end
def to_int
Integer(str)
end
def to_i
str.to_i
end
def to_f
str.to_f
end
# Inspection & Debugging ---------------------------------------------------
# Prints the slice as <code>"string"@offset</code>.
def inspect
str.inspect << "@#{offset}"
end
end

87
lib/parslet/source.rb Normal file
View File

@ -0,0 +1,87 @@
require 'stringio'
require 'strscan'
require 'parslet/source/line_cache'
module Parslet
# Wraps the input string for parslet.
#
class Source
def initialize(str)
raise(
ArgumentError,
"Must construct Source with a string like object."
) unless str.respond_to?(:to_str)
@str = StringScanner.new(str)
# maps 1 => /./m, 2 => /../m, etc...
@re_cache = Hash.new { |h,k|
h[k] = /(.|$){#{k}}/m }
@line_cache = LineCache.new
@line_cache.scan_for_line_endings(0, str)
end
# Checks if the given pattern matches at the current input position.
#
# @param pattern [Regexp] pattern to check for
# @return [Boolean] true if the pattern matches at #pos
#
def matches?(pattern)
@str.match?(pattern)
end
alias match matches?
# Consumes n characters from the input, returning them as a slice of the
# input.
#
def consume(n)
original_pos = @str.pos
slice_str = @str.scan(@re_cache[n])
slice = Parslet::Slice.new(
slice_str,
original_pos,
@line_cache)
return slice
end
# Returns how many chars remain in the input.
#
def chars_left
@str.rest_size
end
# Returns how many chars there are between current position and the
# string given. If the string given doesn't occur in the source, then
# the remaining chars (#chars_left) are returned.
#
# @return [Fixnum] count of chars until str or #chars_left
#
def chars_until str
slice_str = @str.check_until(Regexp.new(Regexp.escape(str)))
return chars_left unless slice_str
return slice_str.size - str.size
end
# Position of the parse as a character offset into the original string.
# @note: Encodings...
def pos
@str.pos
end
def pos=(n)
@str.pos = n
rescue RangeError
end
# Returns a <line, column> tuple for the given position. If no position is
# given, line/column information is returned for the current position
# given by #pos.
#
def line_and_column(position=nil)
@line_cache.line_and_column(position || self.pos)
end
end
end

View File

@ -0,0 +1,96 @@
class Parslet::Source
# A cache for line start positions.
#
class LineCache
def initialize
# Stores line endings as a simple position number. The first line always
# starts at 0; numbers beyond the biggest entry are on any line > size,
# but probably make a scan to that position neccessary.
@line_ends = []
@line_ends.extend RangeSearch
end
# Returns a <line, column> tuple for the given input position.
#
def line_and_column(pos)
eol_idx = @line_ends.lbound(pos)
if eol_idx
# eol_idx points to the offset that ends the current line.
# Let's try to find the offset that starts it:
offset = eol_idx>0 && @line_ends[eol_idx-1] || 0
return [eol_idx+1, pos-offset+1]
else
# eol_idx is nil, that means that we're beyond the last line end that
# we know about. Pretend for now that we're just on the last line.
offset = @line_ends.last || 0
return [@line_ends.size+1, pos-offset+1]
end
end
def scan_for_line_endings(start_pos, buf)
return unless buf
buf = StringScanner.new(buf)
return unless buf.exist?(/\n/)
## If we have already read part or all of buf, we already know about
## line ends in that portion. remove it and correct cur (search index)
if @last_line_end && start_pos < @last_line_end
# Let's not search the range from start_pos to last_line_end again.
buf.pos = @last_line_end - start_pos
end
## Scan the string for line endings; store the positions of all endings
## in @line_ends.
while buf.skip_until(/\n/)
@last_line_end = start_pos + buf.pos
@line_ends << @last_line_end
end
end
end
# Mixin for arrays that implicitly give a number of ranges, where one range
# begins where the other one ends.
#
# Example:
#
# [10, 20, 30]
# # would describe [0, 10], (10, 20], (20, 30]
#
module RangeSearch
def find_mid(left, right)
# NOTE: Jonathan Hinkle reported that when mathn is required, just
# dividing and relying on the integer truncation is not enough.
left + ((right - left) / 2).floor
end
# Scans the array for the first number that is > than bound. Returns the
# index of that number.
#
def lbound(bound)
return nil if empty?
return nil unless last > bound
left = 0
right = size - 1
loop do
mid = find_mid(left, right)
if self[mid] > bound
right = mid
else
# assert: self[mid] <= bound
left = mid+1
end
if right <= left
return right
end
end
end
end
end

236
lib/parslet/transform.rb Normal file
View File

@ -0,0 +1,236 @@
require 'parslet/pattern'
# Transforms an expression tree into something else. The transformation
# performs a depth-first, post-order traversal of the expression tree. During
# that traversal, each time a rule matches a node, the node is replaced by the
# result of the block associated to the rule. Otherwise the node is accepted
# as is into the result tree.
#
# This is almost what you would generally do with a tree visitor, except that
# you can match several levels of the tree at once.
#
# As a consequence of this, the resulting tree will contain pieces of the
# original tree and new pieces. Most likely, you will want to transform the
# original tree wholly, so this isn't a problem.
#
# You will not be able to create a loop, given that each node will be replaced
# only once and then left alone. This means that the results of a replacement
# will not be acted upon.
#
# Example:
#
# class Example < Parslet::Transform
# rule(:string => simple(:x)) { # (1)
# StringLiteral.new(x)
# }
# end
#
# A tree transform (Parslet::Transform) is defined by a set of rules. Each
# rule can be defined by calling #rule with the pattern as argument. The block
# given will be called every time the rule matches somewhere in the tree given
# to #apply. It is passed a Hash containing all the variable bindings of this
# pattern match.
#
# In the above example, (1) illustrates a simple matching rule.
#
# Let's say you want to parse matching parentheses and distill a maximum nest
# depth. You would probably write a parser like the one in example/parens.rb;
# here's the relevant part:
#
# rule(:balanced) {
# str('(').as(:l) >> balanced.maybe.as(:m) >> str(')').as(:r)
# }
#
# If you now apply this to a string like '(())', you get a intermediate parse
# tree that looks like this:
#
# {
# l: '(',
# m: {
# l: '(',
# m: nil,
# r: ')'
# },
# r: ')'
# }
#
# This parse tree is good for debugging, but what we would really like to have
# is just the nesting depth. This transformation rule will produce that:
#
# rule(:l => '(', :m => simple(:x), :r => ')') {
# # innermost :m will contain nil
# x.nil? ? 1 : x+1
# }
#
# = Usage patterns
#
# There are four ways of using this class. The first one is very much
# recommended, followed by the second one for generality. The other ones are
# omitted here.
#
# Recommended usage is as follows:
#
# class MyTransformator < Parslet::Transform
# rule(...) { ... }
# rule(...) { ... }
# # ...
# end
# MyTransformator.new.apply(tree)
#
# Alternatively, you can use the Transform class as follows:
#
# transform = Parslet::Transform.new do
# rule(...) { ... }
# end
# transform.apply(tree)
#
# = Execution context
#
# The execution context of action blocks differs depending on the arity of
# said blocks. This can be confusing. It is however somewhat intentional. You
# should not create fat Transform descendants containing a lot of helper methods,
# instead keep your AST class construction in global scope or make it available
# through a factory. The following piece of code illustrates usage of global
# scope:
#
# transform = Parslet::Transform.new do
# rule(...) { AstNode.new(a_variable) }
# rule(...) { Ast.node(a_variable) } # modules are nice
# end
# transform.apply(tree)
#
# And here's how you would use a class builder (a factory):
#
# transform = Parslet::Transform.new do
# rule(...) { builder.add_node(a_variable) }
# rule(...) { |d| d[:builder].add_node(d[:a_variable]) }
# end
# transform.apply(tree, :builder => Builder.new)
#
# As you can see, Transform allows you to inject local context for your rule
# action blocks to use.
#
class Parslet::Transform
# FIXME: Maybe only part of it? Or maybe only include into constructor
# context?
include Parslet
class << self
# FIXME: Only do this for subclasses?
include Parslet
# Define a rule for the transform subclass.
#
def rule(expression, &block)
@__transform_rules ||= []
@__transform_rules << [Parslet::Pattern.new(expression), block]
end
# Allows accessing the class' rules
#
def rules
@__transform_rules || []
end
end
def initialize(&block)
@rules = []
if block
instance_eval(&block)
end
end
# Defines a rule to be applied whenever apply is called on a tree. A rule
# is composed of two parts:
#
# * an *expression pattern*
# * a *transformation block*
#
def rule(expression, &block)
@rules << [
Parslet::Pattern.new(expression),
block
]
end
# Applies the transformation to a tree that is generated by Parslet::Parser
# or a simple parslet. Transformation will proceed down the tree, replacing
# parts/all of it with new objects. The resulting object will be returned.
#
def apply(obj, context=nil)
transform_elt(
case obj
when Hash
recurse_hash(obj, context)
when Array
recurse_array(obj, context)
else
obj
end,
context
)
end
# Executes the block on the bindings obtained by Pattern#match, if such a match
# can be made. Depending on the arity of the given block, it is called in
# one of two environments: the current one or a clean toplevel environment.
#
# If you would like the current environment preserved, please use the
# arity 1 variant of the block. Alternatively, you can inject a context object
# and call methods on it (think :ctx => self).
#
# # the local variable a is simulated
# t.call_on_match(:a => :b) { a }
# # no change of environment here
# t.call_on_match(:a => :b) { |d| d[:a] }
#
def call_on_match(bindings, block)
if block
if block.arity == 1
return block.call(bindings)
else
context = Context.new(bindings)
return context.instance_eval(&block)
end
end
end
# Allow easy access to all rules, the ones defined in the instance and the
# ones predefined in a subclass definition.
#
def rules
self.class.rules + @rules
end
# @api private
#
def transform_elt(elt, context)
rules.each do |pattern, block|
if bindings=pattern.match(elt, context)
# Produces transformed value
return call_on_match(bindings, block)
end
end
# No rule matched - element is not transformed
return elt
end
# @api private
#
def recurse_hash(hsh, ctx)
hsh.inject({}) do |new_hsh, (k,v)|
new_hsh[k] = apply(v, ctx)
new_hsh
end
end
# @api private
#
def recurse_array(ary, ctx)
ary.map { |elt| apply(elt, ctx) }
end
end
require 'parslet/context'