parser.rb |
|
---|---|
EBNF Parser for EBNF.Produces an Abstract Synatx Tree in S-Expression form for the input grammar file |
require 'ebnf/rule'
require 'ebnf/terminals'
require 'ebnf/ll1/parser'
require 'meta'
require 'sxp'
require 'logger'
class EBNFLL1Parser
include EBNF::LL1::Parser
include EBNFParserMeta
include EBNF::Terminals |
An internal class used for capturing the values of a production. |
class ProdResult
attr_accessor :prod
attr_accessor :values
def initialize(prod, *values)
@prod, @values = prod, values
end
def to_ary
values.map {|v| v.respond_to?(:to_ary) ? v.to_ary : v}.unshift(@prod)
end
def inspect
"(#{prod} #{values.map(&:inspect).join(' ')})"
end
end |
Abstract syntax tree from parse @return [ArrayEBNF::Rule] |
attr_reader :ast |
TerminalsDefine rules for Terminals, placing results on the input stack, making them available to upstream non-Terminal rules. Terminals are defined with a symbol matching the associated rule name, and a regular expression used by the lexer. The The The Terminals are matched in the order of appearance |
|
Match the Left hand side of a rule or terminal
|
terminal(:LHS, LHS) do |prod, token, input|
input[:id], input[:symbol] = token.value.to_s.scan(/(?:\[([^\]]+)\])?\s*(\w+)\s*::=/).first
end |
Match
|
terminal(:SYMBOL, SYMBOL) do |prod, token, input|
input[:terminal] = token.value.to_sym
end |
Match
|
terminal(:HEX, HEX) do |prod, token, input|
input[:terminal] = [:hex, token.value]
end |
Terminal for
|
terminal(:RANGE, RANGE, unescape: true) do |prod, token, input|
input[:terminal] = [:range, token.value[1..-2]]
end |
Terminal for
|
terminal(:O_RANGE, O_RANGE, unescape: true) do |prod, token, input|
input[:terminal] = [:range, token.value[1..-2]]
end |
Strings have internal escape sequences expanded and are passed through without surrounding quotes as terminals |
|
Match double quote string
|
terminal(:STRING1, STRING1, unescape: true) do |prod, token, input|
input[:terminal] = token.value[1..-2]
end |
Match single quote string
|
terminal(:STRING2, STRING2, unescape: true) do |prod, token, input|
input[:terminal] = token.value[1..-2]
end |
The |
|
Match
|
terminal(:POSTFIX, POSTFIX) do |prod, token, input|
input[:postfix] = token.value
end |
The |
|
Make sure we recognize string terminals, even though they're not actually used in processing. This defines a "catch-all" terminal for the lexer. |
terminal(nil, %r(@terminals|@pass|[\[\]|\-\(\)])) do |prod, token, input|
input[:terminal] = token.value
end |
Non-terminal productionsDefine productions for non-Termainals. This can include The The The |
|
Production for end of Look for
|
production(:declaration) do |input, data, callback| |
data contains a declaration. Invoke callback |
if data[:terminal]
callback.call(:terminals, data[:terminal])
elsif data[:pass]
callback.call(:pass, data[:pass])
end
end |
Production for end of Create rule from expression value and pass to callback
|
production(:rule) do |input, data, callback| |
data contains an expression. Invoke callback |
expr = data[:expression].respond_to?(:to_ary) ? data[:expression].to_ary : data[:expression]
callback.call(:rule, EBNF::Rule.new(data[:symbol].to_sym, data[:id], expr)) if expr
end |
Production for end of
|
production(:expression) do |input, data, callback|
input[:expression] = data[:alt]
end |
Production for end of
Note that this also may just pass through from
|
production(:alt) do |input, data, callback|
input[:alt] = if data[:seq].length > 1
ProdResult.new(:alt, *data[:seq])
else
data[:seq].first
end
end |
Production for end of
Note that this also may just pass through from
|
production(:seq) do |input, data, callback|
input[:seq] ||= []
input[:seq] << if data[:diff].length > 1
ProdResult.new(:seq, *data[:diff])
else
data[:diff].first
end
end |
|
production(:diff) do |input, data, callback|
input[:diff] ||= []
data[:postfix] ||= []
input[:diff] << if data[:postfix].length > 1
ProdResult.new(:diff, *data[:postfix])
else
data[:postfix].first
end
end |
Production for end of
|
production(:postfix) do |input, data, callback| |
Push result onto input stack, as the |
input[:postfix] ||= []
input[:postfix] << case data[:postfix]
when "*" then ProdResult.new(:star, data[:primary])
when "+" then ProdResult.new(:plus, data[:primary])
when "?" then ProdResult.new(:opt, data[:primary])
else data[:primary]
end
end |
Production for end of This may either be a terminal, or the result of an
|
production(:primary) do |input, data, callback|
input[:primary] = data[:expression] || data[:terminal]
end |
Production for end of pass non-terminal.
|
production(:pass) do |input, data, callback|
expression = data[:expression]
expression = expression.to_ary if expression.respond_to?(:to_ary)
input[:pass] = expression
end |
Parser invocation.On start, yield ourselves if a block is given, otherwise, return this parser instance @param [#read, #to_s] input @param [Hash{Symbol => Object}] options @option options [Boolean] :level Trace level. 0(debug), 1(info), 2(warn), 3(error). @return [self] |
def initialize(input, **options, &block) |
Read input, if necessary, which will be used in a Scanner which feads the Lexer. |
@input = input.respond_to?(:read) ? input.read : input.to_s |
If the |
if options.has_key?(:level)
options[:logger] = Logger.new(STDERR)
options[:logger].level = options.fetch(:level, 2)
options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
end
parsing_terminals = false
@ast = []
parse(@input, START.to_sym, branch: BRANCH,
first: FIRST,
follow: FOLLOW,
cleanup: CLEANUP,
whitespace: EBNF::Terminals::PASS,
reset_on_start: true,
**options
) do |context, *data|
rule = case context
when :terminals |
After parsing |
parsing_terminals = true
rule = EBNF::Rule.new(nil, nil, data.first, kind: :terminals)
when :pass |
After parsing |
rule = EBNF::Rule.new(nil, nil, data.first, kind: :pass)
when :rule |
A rule which has already been turned into a |
rule = data.first
rule.kind = :terminal if parsing_terminals
rule
end
@ast << rule if rule
end
@ast
end |
Output formatted S-Expression of grammar |
def to_sxp
require 'sxp' unless defined?(SXP) |
Output rules as a formatted S-Expression |
SXP::Generator.string(@ast.map(&:for_sxp))
end
end |