vendored parslet, deemed stable enough and better without dependency
This commit is contained in:
50
lib/parslet/atoms/alternative.rb
Normal file
50
lib/parslet/atoms/alternative.rb
Normal file
@ -0,0 +1,50 @@
|
||||
|
||||
# Alternative during matching. Contains a list of parslets that is tried each
|
||||
# one in turn. Only fails if all alternatives fail.
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# str('a') | str('b') # matches either 'a' or 'b'
|
||||
#
|
||||
class Parslet::Atoms::Alternative < Parslet::Atoms::Base
|
||||
attr_reader :alternatives
|
||||
|
||||
# Constructs an Alternative instance using all given parslets in the order
|
||||
# given. This is what happens if you call '|' on existing parslets, like
|
||||
# this:
|
||||
#
|
||||
# str('a') | str('b')
|
||||
#
|
||||
def initialize(*alternatives)
|
||||
super()
|
||||
|
||||
@alternatives = alternatives
|
||||
@error_msg = "Expected one of #{alternatives.inspect}"
|
||||
end
|
||||
|
||||
#---
|
||||
# Don't construct a hanging tree of Alternative parslets, instead store them
|
||||
# all here. This reduces the number of objects created.
|
||||
#+++
|
||||
def |(parslet)
|
||||
self.class.new(*@alternatives + [parslet])
|
||||
end
|
||||
|
||||
def try(source, context, consume_all)
|
||||
errors = alternatives.map { |a|
|
||||
success, value = result = a.apply(source, context, consume_all)
|
||||
return result if success
|
||||
|
||||
# Aggregate all errors
|
||||
value
|
||||
}
|
||||
|
||||
# If we reach this point, all alternatives have failed.
|
||||
context.err(self, source, @error_msg, errors)
|
||||
end
|
||||
|
||||
precedence ALTERNATE
|
||||
def to_s_inner(prec)
|
||||
alternatives.map { |a| a.to_s(prec) }.join(' / ')
|
||||
end
|
||||
end
|
151
lib/parslet/atoms/base.rb
Normal file
151
lib/parslet/atoms/base.rb
Normal file
@ -0,0 +1,151 @@
|
||||
# Base class for all parslets, handles orchestration of calls and implements
|
||||
# a lot of the operator and chaining methods.
|
||||
#
|
||||
# Also see Parslet::Atoms::DSL chaining parslet atoms together.
|
||||
#
|
||||
class Parslet::Atoms::Base
|
||||
include Parslet::Atoms::Precedence
|
||||
include Parslet::Atoms::DSL
|
||||
include Parslet::Atoms::CanFlatten
|
||||
|
||||
# Given a string or an IO object, this will attempt a parse of its contents
|
||||
# and return a result. If the parse fails, a Parslet::ParseFailed exception
|
||||
# will be thrown.
|
||||
#
|
||||
# @param io [String, Source] input for the parse process
|
||||
# @option options [Parslet::ErrorReporter] :reporter error reporter to use,
|
||||
# defaults to Parslet::ErrorReporter::Tree
|
||||
# @option options [Boolean] :prefix Should a prefix match be accepted?
|
||||
# (default: false)
|
||||
# @return [Hash, Array, Parslet::Slice] PORO (Plain old Ruby object) result
|
||||
# tree
|
||||
#
|
||||
def parse(io, options={})
|
||||
source = io.respond_to?(:line_and_column) ?
|
||||
io :
|
||||
Parslet::Source.new(io)
|
||||
|
||||
# Try to cheat. Assuming that we'll be able to parse the input, don't
|
||||
# run error reporting code.
|
||||
success, value = setup_and_apply(source, nil, !options[:prefix])
|
||||
|
||||
# If we didn't succeed the parse, raise an exception for the user.
|
||||
# Stack trace will be off, but the error tree should explain the reason
|
||||
# it failed.
|
||||
unless success
|
||||
# Cheating has not paid off. Now pay the cost: Rerun the parse,
|
||||
# gathering error information in the process.
|
||||
reporter = options[:reporter] || Parslet::ErrorReporter::Tree.new
|
||||
source.pos = 0
|
||||
success, value = setup_and_apply(source, reporter, !options[:prefix])
|
||||
|
||||
fail "Assertion failed: success was true when parsing with reporter" \
|
||||
if success
|
||||
|
||||
# Value is a Parslet::Cause, which can be turned into an exception:
|
||||
value.raise
|
||||
|
||||
fail "NEVER REACHED"
|
||||
end
|
||||
|
||||
# assert: success is true
|
||||
|
||||
# Extra input is now handled inline with the rest of the parsing. If
|
||||
# really we have success == true, prefix: false and still some input
|
||||
# is left dangling, that is a BUG.
|
||||
if !options[:prefix] && source.chars_left > 0
|
||||
fail "BUG: New error strategy should not reach this point."
|
||||
end
|
||||
|
||||
return flatten(value)
|
||||
end
|
||||
|
||||
# Creates a context for parsing and applies the current atom to the input.
|
||||
# Returns the parse result.
|
||||
#
|
||||
# @return [<Boolean, Object>] Result of the parse. If the first member is
|
||||
# true, the parse has succeeded.
|
||||
def setup_and_apply(source, error_reporter, consume_all)
|
||||
context = Parslet::Atoms::Context.new(error_reporter)
|
||||
apply(source, context, consume_all)
|
||||
end
|
||||
|
||||
# Calls the #try method of this parslet. Success consumes input, error will
|
||||
# rewind the input.
|
||||
#
|
||||
# @param source [Parslet::Source] source to read input from
|
||||
# @param context [Parslet::Atoms::Context] context to use for the parsing
|
||||
# @param consume_all [Boolean] true if the current parse must consume
|
||||
# all input by itself.
|
||||
def apply(source, context, consume_all=false)
|
||||
old_pos = source.pos
|
||||
|
||||
success, value = result = context.try_with_cache(self, source, consume_all)
|
||||
|
||||
if success
|
||||
# If a consume_all parse was made and doesn't result in the consumption
|
||||
# of all the input, that is considered an error.
|
||||
if consume_all && source.chars_left>0
|
||||
# Read 10 characters ahead. Why ten? I don't know.
|
||||
offending_pos = source.pos
|
||||
offending_input = source.consume(10)
|
||||
|
||||
# Rewind input (as happens always in error case)
|
||||
source.pos = old_pos
|
||||
|
||||
return context.err_at(
|
||||
self,
|
||||
source,
|
||||
"Don't know what to do with #{offending_input.to_s.inspect}",
|
||||
offending_pos
|
||||
)
|
||||
end
|
||||
|
||||
# Looks like the parse was successful after all. Don't rewind the input.
|
||||
return result
|
||||
end
|
||||
|
||||
# We only reach this point if the parse has failed. Rewind the input.
|
||||
source.pos = old_pos
|
||||
return result
|
||||
end
|
||||
|
||||
# Override this in your Atoms::Base subclasses to implement parsing
|
||||
# behaviour.
|
||||
#
|
||||
def try(source, context, consume_all)
|
||||
raise NotImplementedError, \
|
||||
"Atoms::Base doesn't have behaviour, please implement #try(source, context)."
|
||||
end
|
||||
|
||||
# Returns true if this atom can be cached in the packrat cache. Most parslet
|
||||
# atoms are cached, so this always returns true, unless overridden.
|
||||
#
|
||||
def cached?
|
||||
true
|
||||
end
|
||||
|
||||
# Debug printing - in Treetop syntax.
|
||||
#
|
||||
def self.precedence(prec)
|
||||
define_method(:precedence) { prec }
|
||||
end
|
||||
precedence BASE
|
||||
def to_s(outer_prec=OUTER)
|
||||
if outer_prec < precedence
|
||||
"("+to_s_inner(precedence)+")"
|
||||
else
|
||||
to_s_inner(precedence)
|
||||
end
|
||||
end
|
||||
def inspect
|
||||
to_s(OUTER)
|
||||
end
|
||||
private
|
||||
|
||||
# Produces an instance of Success and returns it.
|
||||
#
|
||||
def succ(result)
|
||||
[true, result]
|
||||
end
|
||||
end
|
137
lib/parslet/atoms/can_flatten.rb
Normal file
137
lib/parslet/atoms/can_flatten.rb
Normal file
@ -0,0 +1,137 @@
|
||||
|
||||
module Parslet::Atoms
|
||||
# A series of helper functions that have the common topic of flattening
|
||||
# result values into the intermediary tree that consists of Ruby Hashes and
|
||||
# Arrays.
|
||||
#
|
||||
# This module has one main function, #flatten, that takes an annotated
|
||||
# structure as input and returns the reduced form that users expect from
|
||||
# Atom#parse.
|
||||
#
|
||||
# NOTE: Since all of these functions are just that, functions without
|
||||
# side effects, they are in a module and not in a class. Its hard to draw
|
||||
# the line sometimes, but this is beyond.
|
||||
#
|
||||
module CanFlatten
|
||||
# Takes a mixed value coming out of a parslet and converts it to a return
|
||||
# value for the user by dropping things and merging hashes.
|
||||
#
|
||||
# Named is set to true if this result will be embedded in a Hash result from
|
||||
# naming something using <code>.as(...)</code>. It changes the folding
|
||||
# semantics of repetition.
|
||||
#
|
||||
def flatten(value, named=false)
|
||||
# Passes through everything that isn't an array of things
|
||||
return value unless value.instance_of? Array
|
||||
|
||||
# Extracts the s-expression tag
|
||||
tag, *tail = value
|
||||
|
||||
# Merges arrays:
|
||||
result = tail.
|
||||
map { |e| flatten(e) } # first flatten each element
|
||||
|
||||
case tag
|
||||
when :sequence
|
||||
return flatten_sequence(result)
|
||||
when :maybe
|
||||
return named ? result.first : result.first || ''
|
||||
when :repetition
|
||||
return flatten_repetition(result, named)
|
||||
end
|
||||
|
||||
fail "BUG: Unknown tag #{tag.inspect}."
|
||||
end
|
||||
|
||||
# Lisp style fold left where the first element builds the basis for
|
||||
# an inject.
|
||||
#
|
||||
def foldl(list, &block)
|
||||
return '' if list.empty?
|
||||
list[1..-1].inject(list.first, &block)
|
||||
end
|
||||
|
||||
# Flatten results from a sequence of parslets.
|
||||
#
|
||||
# @api private
|
||||
#
|
||||
def flatten_sequence(list)
|
||||
foldl(list.compact) { |r, e| # and then merge flat elements
|
||||
merge_fold(r, e)
|
||||
}
|
||||
end
|
||||
# @api private
|
||||
def merge_fold(l, r)
|
||||
# equal pairs: merge. ----------------------------------------------------
|
||||
if l.class == r.class
|
||||
if l.is_a?(Hash)
|
||||
warn_about_duplicate_keys(l, r)
|
||||
return l.merge(r)
|
||||
else
|
||||
return l + r
|
||||
end
|
||||
end
|
||||
|
||||
# unequal pairs: hoist to same level. ------------------------------------
|
||||
|
||||
# Maybe classes are not equal, but both are stringlike?
|
||||
if l.respond_to?(:to_str) && r.respond_to?(:to_str)
|
||||
# if we're merging a String with a Slice, the slice wins.
|
||||
return r if r.respond_to? :to_slice
|
||||
return l if l.respond_to? :to_slice
|
||||
|
||||
fail "NOTREACHED: What other stringlike classes are there?"
|
||||
end
|
||||
|
||||
# special case: If one of them is a string/slice, the other is more important
|
||||
return l if r.respond_to? :to_str
|
||||
return r if l.respond_to? :to_str
|
||||
|
||||
# otherwise just create an array for one of them to live in
|
||||
return l + [r] if r.class == Hash
|
||||
return [l] + r if l.class == Hash
|
||||
|
||||
fail "Unhandled case when foldr'ing sequence."
|
||||
end
|
||||
|
||||
# Flatten results from a repetition of a single parslet. named indicates
|
||||
# whether the user has named the result or not. If the user has named
|
||||
# the results, we want to leave an empty list alone - otherwise it is
|
||||
# turned into an empty string.
|
||||
#
|
||||
# @api private
|
||||
#
|
||||
def flatten_repetition(list, named)
|
||||
if list.any? { |e| e.instance_of?(Hash) }
|
||||
# If keyed subtrees are in the array, we'll want to discard all
|
||||
# strings inbetween. To keep them, name them.
|
||||
return list.select { |e| e.instance_of?(Hash) }
|
||||
end
|
||||
|
||||
if list.any? { |e| e.instance_of?(Array) }
|
||||
# If any arrays are nested in this array, flatten all arrays to this
|
||||
# level.
|
||||
return list.
|
||||
select { |e| e.instance_of?(Array) }.
|
||||
flatten(1)
|
||||
end
|
||||
|
||||
# Consistent handling of empty lists, when we act on a named result
|
||||
return [] if named && list.empty?
|
||||
|
||||
# If there are only strings, concatenate them and return that.
|
||||
foldl(list) { |s,e| s+e }
|
||||
end
|
||||
|
||||
# That annoying warning 'Duplicate subtrees while merging result' comes
|
||||
# from here. You should add more '.as(...)' names to your intermediary tree.
|
||||
#
|
||||
def warn_about_duplicate_keys(h1, h2)
|
||||
d = h1.keys & h2.keys
|
||||
unless d.empty?
|
||||
warn "Duplicate subtrees while merging result of \n #{self.inspect}\nonly the values"+
|
||||
" of the latter will be kept. (keys: #{d.inspect})"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
38
lib/parslet/atoms/capture.rb
Normal file
38
lib/parslet/atoms/capture.rb
Normal file
@ -0,0 +1,38 @@
|
||||
|
||||
# Stores the result of matching an atom against input in the #captures in
|
||||
# parse context. Doing so will allow you to pull parts of the ongoing parse
|
||||
# out later and use them to match other pieces of input.
|
||||
#
|
||||
# Example:
|
||||
# # After this, context.captures[:an_a] returns 'a'
|
||||
# str('a').capture(:an_a)
|
||||
#
|
||||
# # Capture and use of the capture: (matches either 'aa' or 'bb')
|
||||
# match['ab'].capture(:first) >>
|
||||
# dynamic { |src, ctx| str(ctx.captures[:first]) }
|
||||
#
|
||||
class Parslet::Atoms::Capture < Parslet::Atoms::Base
|
||||
attr_reader :parslet, :name
|
||||
|
||||
def initialize(parslet, name)
|
||||
super()
|
||||
|
||||
@parslet, @name = parslet, name
|
||||
end
|
||||
|
||||
def apply(source, context, consume_all)
|
||||
success, value = result = parslet.apply(source, context, consume_all)
|
||||
|
||||
if success
|
||||
context.captures[name.to_sym] =
|
||||
flatten(value)
|
||||
end
|
||||
|
||||
return result
|
||||
end
|
||||
|
||||
def to_s_inner(prec)
|
||||
"(#{name.inspect} = #{parslet.to_s(prec)})"
|
||||
end
|
||||
end
|
||||
|
91
lib/parslet/atoms/context.rb
Normal file
91
lib/parslet/atoms/context.rb
Normal file
@ -0,0 +1,91 @@
|
||||
module Parslet::Atoms
|
||||
# Helper class that implements a transient cache that maps position and
|
||||
# parslet object to results. This is used for memoization in the packrat
|
||||
# style.
|
||||
#
|
||||
# Also, error reporter is stored here and error reporting happens through
|
||||
# this class. This makes the reporting pluggable.
|
||||
#
|
||||
class Context
|
||||
# @param reporter [#err, #err_at] Error reporter (leave empty for default
|
||||
# reporter)
|
||||
def initialize(reporter=Parslet::ErrorReporter::Tree.new)
|
||||
@cache = Hash.new { |h, k| h[k] = {} }
|
||||
@reporter = reporter
|
||||
@captures = Parslet::Scope.new
|
||||
end
|
||||
|
||||
# Caches a parse answer for obj at source.pos. Applying the same parslet
|
||||
# at one position of input always yields the same result, unless the input
|
||||
# has changed.
|
||||
#
|
||||
# We need the entire source here so we can ask for how many characters
|
||||
# were consumed by a successful parse. Imitation of such a parse must
|
||||
# advance the input pos by the same amount of bytes.
|
||||
#
|
||||
def try_with_cache(obj, source, consume_all)
|
||||
beg = source.pos
|
||||
|
||||
# Not in cache yet? Return early.
|
||||
unless entry = lookup(obj, beg)
|
||||
result = obj.try(source, self, consume_all)
|
||||
|
||||
if obj.cached?
|
||||
set obj, beg, [result, source.pos-beg]
|
||||
end
|
||||
|
||||
return result
|
||||
end
|
||||
|
||||
# the condition in unless has returned true, so entry is not nil.
|
||||
result, advance = entry
|
||||
|
||||
# The data we're skipping here has been read before. (since it is in
|
||||
# the cache) PLUS the actual contents are not interesting anymore since
|
||||
# we know obj matches at beg. So skip reading.
|
||||
source.pos = beg + advance
|
||||
return result
|
||||
end
|
||||
|
||||
# Report an error at a given position.
|
||||
# @see ErrorReporter
|
||||
#
|
||||
def err_at(*args)
|
||||
return [false, @reporter.err_at(*args)] if @reporter
|
||||
return [false, nil]
|
||||
end
|
||||
|
||||
# Report an error.
|
||||
# @see ErrorReporter
|
||||
#
|
||||
def err(*args)
|
||||
return [false, @reporter.err(*args)] if @reporter
|
||||
return [false, nil]
|
||||
end
|
||||
|
||||
# Returns the current captures made on the input (see
|
||||
# Parslet::Atoms::Base#capture). Use as follows:
|
||||
#
|
||||
# context.captures[:foobar] # => returns capture :foobar
|
||||
#
|
||||
attr_reader :captures
|
||||
|
||||
# Starts a new scope. Use the #scope method of Parslet::Atoms::DSL
|
||||
# to call this.
|
||||
#
|
||||
def scope
|
||||
captures.push
|
||||
yield
|
||||
ensure
|
||||
captures.pop
|
||||
end
|
||||
|
||||
private
|
||||
def lookup(obj, pos)
|
||||
@cache[pos][obj]
|
||||
end
|
||||
def set(obj, pos, val)
|
||||
@cache[pos][obj] = val
|
||||
end
|
||||
end
|
||||
end
|
109
lib/parslet/atoms/dsl.rb
Normal file
109
lib/parslet/atoms/dsl.rb
Normal file
@ -0,0 +1,109 @@
|
||||
|
||||
# A mixin module that defines operations that can be called on any subclass
|
||||
# of Parslet::Atoms::Base. These operations make parslets atoms chainable and
|
||||
# allow combination of parslet atoms to form bigger parsers.
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# str('foo') >> str('bar')
|
||||
# str('f').repeat
|
||||
# any.absent? # also called The Epsilon
|
||||
#
|
||||
module Parslet::Atoms::DSL
|
||||
# Construct a new atom that repeats the current atom min times at least and
|
||||
# at most max times. max can be nil to indicate that no maximum is present.
|
||||
#
|
||||
# Example:
|
||||
# # match any number of 'a's
|
||||
# str('a').repeat
|
||||
#
|
||||
# # match between 1 and 3 'a's
|
||||
# str('a').repeat(1,3)
|
||||
#
|
||||
def repeat(min=0, max=nil)
|
||||
Parslet::Atoms::Repetition.new(self, min, max)
|
||||
end
|
||||
|
||||
# Returns a new parslet atom that is only maybe present in the input. This
|
||||
# is synonymous to calling #repeat(0,1). Generated tree value will be
|
||||
# either nil (if atom is not present in the input) or the matched subtree.
|
||||
#
|
||||
# Example:
|
||||
# str('foo').maybe
|
||||
#
|
||||
def maybe
|
||||
Parslet::Atoms::Repetition.new(self, 0, 1, :maybe)
|
||||
end
|
||||
|
||||
# Chains two parslet atoms together as a sequence.
|
||||
#
|
||||
# Example:
|
||||
# str('a') >> str('b')
|
||||
#
|
||||
def >>(parslet)
|
||||
Parslet::Atoms::Sequence.new(self, parslet)
|
||||
end
|
||||
|
||||
# Chains two parslet atoms together to express alternation. A match will
|
||||
# always be attempted with the parslet on the left side first. If it doesn't
|
||||
# match, the right side will be tried.
|
||||
#
|
||||
# Example:
|
||||
# # matches either 'a' OR 'b'
|
||||
# str('a') | str('b')
|
||||
#
|
||||
def |(parslet)
|
||||
Parslet::Atoms::Alternative.new(self, parslet)
|
||||
end
|
||||
|
||||
# Tests for absence of a parslet atom in the input stream without consuming
|
||||
# it.
|
||||
#
|
||||
# Example:
|
||||
# # Only proceed the parse if 'a' is absent.
|
||||
# str('a').absent?
|
||||
#
|
||||
def absent?
|
||||
Parslet::Atoms::Lookahead.new(self, false)
|
||||
end
|
||||
|
||||
# Tests for presence of a parslet atom in the input stream without consuming
|
||||
# it.
|
||||
#
|
||||
# Example:
|
||||
# # Only proceed the parse if 'a' is present.
|
||||
# str('a').present?
|
||||
#
|
||||
def present?
|
||||
Parslet::Atoms::Lookahead.new(self, true)
|
||||
end
|
||||
|
||||
# Alias for present? that will disappear in 2.0 (deprecated)
|
||||
#
|
||||
alias prsnt? present?
|
||||
|
||||
# Alias for absent? that will disappear in 2.0 (deprecated)
|
||||
#
|
||||
alias absnt? absent?
|
||||
|
||||
# Marks a parslet atom as important for the tree output. This must be used
|
||||
# to achieve meaningful output from the #parse method.
|
||||
#
|
||||
# Example:
|
||||
# str('a').as(:b) # will produce {:b => 'a'}
|
||||
#
|
||||
def as(name)
|
||||
Parslet::Atoms::Named.new(self, name)
|
||||
end
|
||||
|
||||
# Captures a part of the input and stores it under the name given. This
|
||||
# is very useful to create self-referential parses. A capture stores
|
||||
# the result of its parse (may be complex) on a successful parse action.
|
||||
#
|
||||
# Example:
|
||||
# str('a').capture(:b) # will store captures[:b] == 'a'
|
||||
#
|
||||
def capture(name)
|
||||
Parslet::Atoms::Capture.new(self, name)
|
||||
end
|
||||
end
|
32
lib/parslet/atoms/dynamic.rb
Normal file
32
lib/parslet/atoms/dynamic.rb
Normal file
@ -0,0 +1,32 @@
|
||||
# Evaluates a block at parse time. The result from the block must be a parser
|
||||
# (something which implements #apply). In the first case, the parser will then
|
||||
# be applied to the input, creating the result.
|
||||
#
|
||||
# Dynamic parses are never cached.
|
||||
#
|
||||
# Example:
|
||||
# dynamic { rand < 0.5 ? str('a') : str('b') }
|
||||
#
|
||||
class Parslet::Atoms::Dynamic < Parslet::Atoms::Base
|
||||
attr_reader :block
|
||||
|
||||
def initialize(block)
|
||||
@block = block
|
||||
end
|
||||
|
||||
def cached?
|
||||
false
|
||||
end
|
||||
|
||||
def try(source, context, consume_all)
|
||||
result = block.call(source, context)
|
||||
|
||||
# Result is a parslet atom.
|
||||
return result.apply(source, context, consume_all)
|
||||
end
|
||||
|
||||
def to_s_inner(prec)
|
||||
"dynamic { ... }"
|
||||
end
|
||||
end
|
||||
|
41
lib/parslet/atoms/entity.rb
Normal file
41
lib/parslet/atoms/entity.rb
Normal file
@ -0,0 +1,41 @@
|
||||
# This wraps pieces of parslet definition and gives them a name. The wrapped
|
||||
# piece is lazily evaluated and cached. This has two purposes:
|
||||
#
|
||||
# * Avoid infinite recursion during evaluation of the definition
|
||||
# * Be able to print things by their name, not by their sometimes
|
||||
# complicated content.
|
||||
#
|
||||
# You don't normally use this directly, instead you should generated it by
|
||||
# using the structuring method Parslet.rule.
|
||||
#
|
||||
class Parslet::Atoms::Entity < Parslet::Atoms::Base
|
||||
attr_reader :name, :block
|
||||
def initialize(name, &block)
|
||||
super()
|
||||
|
||||
@name = name
|
||||
@block = block
|
||||
end
|
||||
|
||||
def try(source, context, consume_all)
|
||||
parslet.apply(source, context, consume_all)
|
||||
end
|
||||
|
||||
def parslet
|
||||
@parslet ||= @block.call.tap { |p|
|
||||
raise_not_implemented unless p
|
||||
}
|
||||
end
|
||||
|
||||
def to_s_inner(prec)
|
||||
name.to_s.upcase
|
||||
end
|
||||
private
|
||||
def raise_not_implemented
|
||||
trace = caller.reject {|l| l =~ %r{#{Regexp.escape(__FILE__)}}} # blatantly stolen from dependencies.rb in activesupport
|
||||
exception = NotImplementedError.new("rule(#{name.inspect}) { ... } returns nil. Still not implemented, but already used?")
|
||||
exception.set_backtrace(trace)
|
||||
|
||||
raise exception
|
||||
end
|
||||
end
|
121
lib/parslet/atoms/infix.rb
Normal file
121
lib/parslet/atoms/infix.rb
Normal file
@ -0,0 +1,121 @@
|
||||
class Parslet::Atoms::Infix < Parslet::Atoms::Base
|
||||
attr_reader :element, :operations
|
||||
|
||||
def initialize(element, operations)
|
||||
super()
|
||||
|
||||
@element = element
|
||||
@operations = operations
|
||||
end
|
||||
|
||||
def try(source, context, consume_all)
|
||||
return catch_error {
|
||||
return succ(
|
||||
produce_tree(
|
||||
precedence_climb(source, context, consume_all)))
|
||||
}
|
||||
end
|
||||
|
||||
# Turns an array of the form ['1', '+', ['2', '*', '3']] into a hash that
|
||||
# reflects the same structure.
|
||||
#
|
||||
def produce_tree(ary)
|
||||
return ary unless ary.kind_of? Array
|
||||
|
||||
left = ary.shift
|
||||
|
||||
until ary.empty?
|
||||
op, right = ary.shift(2)
|
||||
|
||||
# p [left, op, right]
|
||||
|
||||
if right.kind_of? Array
|
||||
# Subexpression -> Subhash
|
||||
left = {l: left, o: op, r: produce_tree(right)}
|
||||
else
|
||||
left = {l: left, o: op, r: right}
|
||||
end
|
||||
end
|
||||
|
||||
left
|
||||
end
|
||||
|
||||
# A precedence climbing algorithm married to parslet, as described here
|
||||
# http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing/
|
||||
#
|
||||
# @note Error handling in this routine is done by throwing :error and
|
||||
# as a value the error to return to parslet. This avoids cluttering
|
||||
# the recursion logic here with parslet error handling.
|
||||
#
|
||||
def precedence_climb(source, context, consume_all, current_prec=1, needs_element=false)
|
||||
result = []
|
||||
|
||||
# To even begin parsing an arithmetic expression, there needs to be
|
||||
# at least one @element.
|
||||
success, value = @element.apply(source, context, false)
|
||||
|
||||
unless success
|
||||
abort context.err(self, source, "#{@element.inspect} was expected", [value])
|
||||
end
|
||||
|
||||
result << flatten(value, true)
|
||||
|
||||
# Loop until we fail on operator matching or until input runs out.
|
||||
loop do
|
||||
op_pos = source.pos
|
||||
op_match, prec, assoc = match_operation(source, context, false)
|
||||
|
||||
# If no operator could be matched here, one of several cases
|
||||
# applies:
|
||||
#
|
||||
# - end of file
|
||||
# - end of expression
|
||||
# - syntax error
|
||||
#
|
||||
# We abort matching the expression here.
|
||||
break unless op_match
|
||||
|
||||
if prec >= current_prec
|
||||
next_prec = (assoc == :left) ? prec+1 : prec
|
||||
|
||||
result << op_match
|
||||
result << precedence_climb(
|
||||
source, context, consume_all, next_prec, true)
|
||||
else
|
||||
source.pos = op_pos
|
||||
return unwrap(result)
|
||||
end
|
||||
end
|
||||
|
||||
return unwrap(result)
|
||||
end
|
||||
|
||||
def unwrap expr
|
||||
expr.size == 1 ? expr.first : expr
|
||||
end
|
||||
|
||||
def match_operation(source, context, consume_all)
|
||||
errors = []
|
||||
@operations.each do |op_atom, prec, assoc|
|
||||
success, value = op_atom.apply(source, context, consume_all)
|
||||
return flatten(value, true), prec, assoc if success
|
||||
|
||||
# assert: this was in fact an error, accumulate
|
||||
errors << value
|
||||
end
|
||||
|
||||
return nil
|
||||
end
|
||||
|
||||
def abort(error)
|
||||
throw :error, error
|
||||
end
|
||||
def catch_error
|
||||
catch(:error) { yield }
|
||||
end
|
||||
|
||||
def to_s_inner(prec)
|
||||
ops = @operations.map { |o, _, _| o.inspect }.join(', ')
|
||||
"infix_expression(#{@element.inspect}, [#{ops}])"
|
||||
end
|
||||
end
|
49
lib/parslet/atoms/lookahead.rb
Normal file
49
lib/parslet/atoms/lookahead.rb
Normal file
@ -0,0 +1,49 @@
|
||||
# Either positive or negative lookahead, doesn't consume its input.
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# str('foo').present? # matches when the input contains 'foo', but leaves it
|
||||
#
|
||||
class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
|
||||
attr_reader :positive
|
||||
attr_reader :bound_parslet
|
||||
|
||||
def initialize(bound_parslet, positive=true)
|
||||
super()
|
||||
|
||||
# Model positive and negative lookahead by testing this flag.
|
||||
@positive = positive
|
||||
@bound_parslet = bound_parslet
|
||||
|
||||
@error_msgs = {
|
||||
:positive => ["Input should start with ", bound_parslet],
|
||||
:negative => ["Input should not start with ", bound_parslet]
|
||||
}
|
||||
end
|
||||
|
||||
def try(source, context, consume_all)
|
||||
pos = source.pos
|
||||
|
||||
success, value = bound_parslet.apply(source, context, consume_all)
|
||||
|
||||
if positive
|
||||
return succ(nil) if success
|
||||
return context.err_at(self, source, @error_msgs[:positive], pos)
|
||||
else
|
||||
return succ(nil) unless success
|
||||
return context.err_at(self, source, @error_msgs[:negative], pos)
|
||||
end
|
||||
|
||||
# This is probably the only parslet that rewinds its input in #try.
|
||||
# Lookaheads NEVER consume their input, even on success, that's why.
|
||||
ensure
|
||||
source.pos = pos
|
||||
end
|
||||
|
||||
precedence LOOKAHEAD
|
||||
def to_s_inner(prec)
|
||||
char = positive ? '&' : '!'
|
||||
|
||||
"#{char}#{bound_parslet.to_s(prec)}"
|
||||
end
|
||||
end
|
32
lib/parslet/atoms/named.rb
Normal file
32
lib/parslet/atoms/named.rb
Normal file
@ -0,0 +1,32 @@
|
||||
# Names a match to influence tree construction.
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# str('foo') # will return 'foo',
|
||||
# str('foo').as(:foo) # will return :foo => 'foo'
|
||||
#
|
||||
class Parslet::Atoms::Named < Parslet::Atoms::Base
|
||||
attr_reader :parslet, :name
|
||||
def initialize(parslet, name)
|
||||
super()
|
||||
|
||||
@parslet, @name = parslet, name
|
||||
end
|
||||
|
||||
def apply(source, context, consume_all)
|
||||
success, value = result = parslet.apply(source, context, consume_all)
|
||||
|
||||
return result unless success
|
||||
succ(
|
||||
produce_return_value(
|
||||
value))
|
||||
end
|
||||
|
||||
def to_s_inner(prec)
|
||||
"#{name}:#{parslet.to_s(prec)}"
|
||||
end
|
||||
private
|
||||
def produce_return_value(val)
|
||||
{ name => flatten(val, true) }
|
||||
end
|
||||
end
|
38
lib/parslet/atoms/re.rb
Normal file
38
lib/parslet/atoms/re.rb
Normal file
@ -0,0 +1,38 @@
|
||||
# Matches a special kind of regular expression that only ever matches one
|
||||
# character at a time. Useful members of this family are: <code>character
|
||||
# ranges, \\w, \\d, \\r, \\n, ...</code>
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# match('[a-z]') # matches a-z
|
||||
# match('\s') # like regexps: matches space characters
|
||||
#
|
||||
class Parslet::Atoms::Re < Parslet::Atoms::Base
|
||||
attr_reader :match, :re
|
||||
def initialize(match)
|
||||
super()
|
||||
|
||||
@match = match.to_s
|
||||
@re = Regexp.new(self.match, Regexp::MULTILINE)
|
||||
@error_msgs = {
|
||||
:premature => "Premature end of input",
|
||||
:failed => "Failed to match #{match.inspect[1..-2]}"
|
||||
}
|
||||
end
|
||||
|
||||
def try(source, context, consume_all)
|
||||
return succ(source.consume(1)) if source.matches?(@re)
|
||||
|
||||
# No string could be read
|
||||
return context.err(self, source, @error_msgs[:premature]) \
|
||||
if source.chars_left < 1
|
||||
|
||||
# No match
|
||||
return context.err(self, source, @error_msgs[:failed])
|
||||
end
|
||||
|
||||
def to_s_inner(prec)
|
||||
match.inspect[1..-2]
|
||||
end
|
||||
end
|
||||
|
83
lib/parslet/atoms/repetition.rb
Normal file
83
lib/parslet/atoms/repetition.rb
Normal file
@ -0,0 +1,83 @@
|
||||
|
||||
# Matches a parslet repeatedly.
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# str('a').repeat(1,3) # matches 'a' at least once, but at most three times
|
||||
# str('a').maybe # matches 'a' if it is present in the input (repeat(0,1))
|
||||
#
|
||||
class Parslet::Atoms::Repetition < Parslet::Atoms::Base
|
||||
attr_reader :min, :max, :parslet
|
||||
def initialize(parslet, min, max, tag=:repetition)
|
||||
super()
|
||||
|
||||
raise ArgumentError,
|
||||
"Asking for zero repetitions of a parslet. (#{parslet.inspect} repeating #{min},#{max})" \
|
||||
if max == 0
|
||||
|
||||
|
||||
@parslet = parslet
|
||||
@min, @max = min, max
|
||||
@tag = tag
|
||||
@error_msgs = {
|
||||
:minrep => "Expected at least #{min} of #{parslet.inspect}",
|
||||
:unconsumed => "Extra input after last repetition"
|
||||
}
|
||||
end
|
||||
|
||||
def try(source, context, consume_all)
|
||||
occ = 0
|
||||
accum = [@tag] # initialize the result array with the tag (for flattening)
|
||||
start_pos = source.pos
|
||||
|
||||
break_on = nil
|
||||
loop do
|
||||
success, value = parslet.apply(source, context, false)
|
||||
|
||||
break_on = value
|
||||
break unless success
|
||||
|
||||
occ += 1
|
||||
accum << value
|
||||
|
||||
# If we're not greedy (max is defined), check if that has been reached.
|
||||
return succ(accum) if max && occ>=max
|
||||
end
|
||||
|
||||
# Last attempt to match parslet was a failure, failure reason in break_on.
|
||||
|
||||
# Greedy matcher has produced a failure. Check if occ (which will
|
||||
# contain the number of successes) is >= min.
|
||||
return context.err_at(
|
||||
self,
|
||||
source,
|
||||
@error_msgs[:minrep],
|
||||
start_pos,
|
||||
[break_on]) if occ < min
|
||||
|
||||
# consume_all is true, that means that we're inside the part of the parser
|
||||
# that should consume the input completely. Repetition failing here means
|
||||
# probably that we didn't.
|
||||
#
|
||||
# We have a special clause to create an error here because otherwise
|
||||
# break_on would get thrown away. It turns out, that contains very
|
||||
# interesting information in a lot of cases.
|
||||
#
|
||||
return context.err(
|
||||
self,
|
||||
source,
|
||||
@error_msgs[:unconsumed],
|
||||
[break_on]) if consume_all && source.chars_left>0
|
||||
|
||||
return succ(accum)
|
||||
end
|
||||
|
||||
precedence REPETITION
|
||||
def to_s_inner(prec)
|
||||
minmax = "{#{min}, #{max}}"
|
||||
minmax = '?' if min == 0 && max == 1
|
||||
|
||||
parslet.to_s(prec) + minmax
|
||||
end
|
||||
end
|
||||
|
26
lib/parslet/atoms/scope.rb
Normal file
26
lib/parslet/atoms/scope.rb
Normal file
@ -0,0 +1,26 @@
|
||||
# Starts a new scope in the parsing process. Please also see the #captures
|
||||
# method.
|
||||
#
|
||||
class Parslet::Atoms::Scope < Parslet::Atoms::Base
|
||||
attr_reader :block
|
||||
def initialize(block)
|
||||
super()
|
||||
|
||||
@block = block
|
||||
end
|
||||
|
||||
def cached?
|
||||
false
|
||||
end
|
||||
|
||||
def apply(source, context, consume_all)
|
||||
context.scope do
|
||||
parslet = block.call
|
||||
return parslet.apply(source, context, consume_all)
|
||||
end
|
||||
end
|
||||
|
||||
def to_s_inner(prec)
|
||||
"scope { #{block.call.to_s(prec)} }"
|
||||
end
|
||||
end
|
45
lib/parslet/atoms/sequence.rb
Normal file
45
lib/parslet/atoms/sequence.rb
Normal file
@ -0,0 +1,45 @@
|
||||
# A sequence of parslets, matched from left to right. Denoted by '>>'
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# str('a') >> str('b') # matches 'a', then 'b'
|
||||
#
|
||||
class Parslet::Atoms::Sequence < Parslet::Atoms::Base
|
||||
attr_reader :parslets
|
||||
def initialize(*parslets)
|
||||
super()
|
||||
|
||||
@parslets = parslets
|
||||
@error_msgs = {
|
||||
:failed => "Failed to match sequence (#{self.inspect})"
|
||||
}
|
||||
end
|
||||
|
||||
def >>(parslet)
|
||||
self.class.new(* @parslets+[parslet])
|
||||
end
|
||||
|
||||
def try(source, context, consume_all)
|
||||
# Presize an array
|
||||
result = Array.new(parslets.size + 1)
|
||||
result[0] = :sequence
|
||||
|
||||
parslets.each_with_index do |p, idx|
|
||||
child_consume_all = consume_all && (idx == parslets.size-1)
|
||||
success, value = p.apply(source, context, child_consume_all)
|
||||
|
||||
unless success
|
||||
return context.err(self, source, @error_msgs[:failed], [value])
|
||||
end
|
||||
|
||||
result[idx+1] = value
|
||||
end
|
||||
|
||||
return succ(result)
|
||||
end
|
||||
|
||||
precedence SEQUENCE
|
||||
def to_s_inner(prec)
|
||||
parslets.map { |p| p.to_s(prec) }.join(' ')
|
||||
end
|
||||
end
|
39
lib/parslet/atoms/str.rb
Normal file
39
lib/parslet/atoms/str.rb
Normal file
@ -0,0 +1,39 @@
|
||||
# Matches a string of characters.
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# str('foo') # matches 'foo'
|
||||
#
|
||||
class Parslet::Atoms::Str < Parslet::Atoms::Base
|
||||
attr_reader :str
|
||||
def initialize(str)
|
||||
super()
|
||||
|
||||
@str = str.to_s
|
||||
@pat = Regexp.new(Regexp.escape(str))
|
||||
@len = str.size
|
||||
@error_msgs = {
|
||||
:premature => "Premature end of input",
|
||||
:failed => "Expected #{str.inspect}, but got "
|
||||
}
|
||||
end
|
||||
|
||||
def try(source, context, consume_all)
|
||||
return succ(source.consume(@len)) if source.matches?(@pat)
|
||||
|
||||
# Input ending early:
|
||||
return context.err(self, source, @error_msgs[:premature]) \
|
||||
if source.chars_left<@len
|
||||
|
||||
# Expected something, but got something else instead:
|
||||
error_pos = source.pos
|
||||
return context.err_at(
|
||||
self, source,
|
||||
[@error_msgs[:failed], source.consume(@len)], error_pos)
|
||||
end
|
||||
|
||||
def to_s_inner(prec)
|
||||
"'#{str}'"
|
||||
end
|
||||
end
|
||||
|
89
lib/parslet/atoms/visitor.rb
Normal file
89
lib/parslet/atoms/visitor.rb
Normal file
@ -0,0 +1,89 @@
|
||||
# Augments all parslet atoms with an accept method that will call back
|
||||
# to the visitor given.
|
||||
|
||||
#
|
||||
module Parslet::Atoms
|
||||
class Base
|
||||
def accept(visitor)
|
||||
raise NotImplementedError, "No #accept method on #{self.class.name}."
|
||||
end
|
||||
end
|
||||
|
||||
class Str
|
||||
# Call back visitors #visit_str method. See parslet/export for an example.
|
||||
#
|
||||
def accept(visitor)
|
||||
visitor.visit_str(str)
|
||||
end
|
||||
end
|
||||
|
||||
class Entity
|
||||
# Call back visitors #visit_entity method. See parslet/export for an
|
||||
# example.
|
||||
#
|
||||
def accept(visitor)
|
||||
visitor.visit_entity(name, block)
|
||||
end
|
||||
end
|
||||
|
||||
class Named
|
||||
# Call back visitors #visit_named method. See parslet/export for an
|
||||
# example.
|
||||
#
|
||||
def accept(visitor)
|
||||
visitor.visit_named(name, parslet)
|
||||
end
|
||||
end
|
||||
|
||||
class Sequence
|
||||
# Call back visitors #visit_sequence method. See parslet/export for an
|
||||
# example.
|
||||
#
|
||||
def accept(visitor)
|
||||
visitor.visit_sequence(parslets)
|
||||
end
|
||||
end
|
||||
|
||||
class Repetition
|
||||
# Call back visitors #visit_repetition method. See parslet/export for an
|
||||
# example.
|
||||
#
|
||||
def accept(visitor)
|
||||
visitor.visit_repetition(@tag, min, max, parslet)
|
||||
end
|
||||
end
|
||||
|
||||
class Alternative
|
||||
# Call back visitors #visit_alternative method. See parslet/export for an
|
||||
# example.
|
||||
#
|
||||
def accept(visitor)
|
||||
visitor.visit_alternative(alternatives)
|
||||
end
|
||||
end
|
||||
|
||||
class Lookahead
|
||||
# Call back visitors #visit_lookahead method. See parslet/export for an
|
||||
# example.
|
||||
#
|
||||
def accept(visitor)
|
||||
visitor.visit_lookahead(positive, bound_parslet)
|
||||
end
|
||||
end
|
||||
|
||||
class Re
|
||||
# Call back visitors #visit_re method. See parslet/export for an example.
|
||||
#
|
||||
def accept(visitor)
|
||||
visitor.visit_re(match)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class Parslet::Parser
|
||||
# Call back visitors #visit_parser method.
|
||||
#
|
||||
def accept(visitor)
|
||||
visitor.visit_parser(root)
|
||||
end
|
||||
end
|
Reference in New Issue
Block a user