parser.rb |
|
---|---|
EBNF Parser for EISO BNF.Produces an Abstract Synatx Tree in S-Expression form for the input grammar file |
require 'ebnf'
require 'ebnf/terminals'
require 'ebnf/peg/parser'
require 'meta'
require 'sxp'
require 'logger'
class ISOEBNFPegParser
include EBNF::PEG::Parser |
The base for terminal-character, which omits "'", '"', and '?'. Could be more optimized, and one might quible with the overly-strictly defined character set, but it is correct. |
TERMINAL_CHARACTER_BASE = %r{
[a-zA-Z0-9] | # letter | decimal digit
, | # concatenate symbol
= | # defining symbol
[\|\/!] | # definition separator symbol
\*\) | # end comment symbol
\) | # end group symbol
\] | # end option symbol
\} | # end repeat symbol
\- | # except symbol
# DIVIDER
\* | # repetition symbol
# DIVIDER
\(\* | # start comment symbol
\( | # start group symbol
\[ | # start option symbol
\{ | # start repeat symbol
[;\.] | # terminator symbol
[:+_%@&$<>^\x20\x23\\`~] # other character
}x
TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"\?]}
FIRST_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|["\?]}
SECOND_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['\?]}
SPECIAL_SEQUENCE_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"]} |
\' | # first quote symbol |
attr_reader :ast |
\" | # second quote symbol \? | # special sequence symbol |
terminal(:integer, /\d+/) do |value, prod|
value.to_i
end |
Abstract syntax tree from parse @return [ArrayEBNF::Rule] |
terminal(:meta_identifier, /[a-zA-Z][a-zA-Z0-9_]*/) do |value|
value.to_sym
end |
|
terminal(:terminal_string, /(?:'#{FIRST_TERMINAL_CHARACTER}+')|(?:"#{SECOND_TERMINAL_CHARACTER}+")/x) do |value|
value[1..-2]
end |
|
terminal(:special_sequence, /\?#{SPECIAL_SEQUENCE_CHARACTER}+\?/) |
|
terminal(:terminal_character, TERMINAL_CHARACTER) |
|
terminal(:empty, //)
# DIVIDER
terminal(:definition_separator_symbol, /[\|\/!]/) |
|
terminal(:terminator_symbol, /[;\.]/) |
|
terminal(:start_option_symbol, /\[|(?:\(\/)/) |
|
terminal(:end_option_symbol, /\]/) |
|
terminal(:start_repeat_symbol, /{|\(:/) |
`[28] startoptionsymbol ::= '[' |
terminal(:end_repeat_symbol, /}|:\)/) |
|
|
|
production(:syntax_rule, clear_packrat: true) do |value, data, callback| |
|
sym = value[0][:meta_identifier]
definitions_list = value[2][:definitions_list]
callback.call(:rule, EBNF::Rule.new(sym.to_sym, nil, definitions_list))
nil
end |
Non-terminal productions |
start_production(:definitions_list, as_hash: true)
production(:definitions_list) do |value|
if value[:_definitions_list_1].length > 0
[:alt, value[:single_definition]] + value[:_definitions_list_1]
else
value[:single_definition]
end
end
production(:_definitions_list_1) do |value|
Array(value.first)
end
start_production(:_definitions_list_2, as_hash: true)
production(:_definitions_list_2) do |value|
if Array(value[:definitions_list]).first == :alt
value[:definitions_list][1..-1]
else
[value[:definitions_list]]
end
end |
|
start_production(:single_definition, as_hash: true)
production(:single_definition) do |value|
if value[:_single_definition_1].length > 0
[:seq, value[:term]] + value[:_single_definition_1]
else
value[:term]
end
end
production(:_single_definition_1) do |value|
value.map {|a1| a1.last[:term]}.compact # Get rid of '|'
end |
value contains an expression. Invoke callback |
start_production(:term, as_hash: true)
production(:term) do |value|
if value[:_term_1]
[:diff, value[:factor], value[:_term_1]]
else
value[:factor]
end
end
production(:_term_1) do |value|
value.last[:exception] if value
end |
Setting
|
start_production(:exception, as_hash: true)
production(:exception) do |value|
value[:factor]
end |
|
start_production(:factor, as_hash: true)
production(:factor) do |value|
if value[:_factor_1]
[:rept, value[:_factor_1], value[:_factor_1], value[:primary]]
else
value[:primary]
end
end
production(:_factor_2) do |value|
value.first[:integer]
end |
|
production(:optional_sequence) do |value|
[:opt, value[1][:definitions_list]]
end |
|
production(:repeated_sequence) do |value|
[:star, value[1][:definitions_list]]
end |
|
production(:grouped_sequence) do |value|
[:seq, value[1][:definitions_list]]
end |
|
def initialize(input, **options, &block) |
|
if options.has_key?(:level)
options[:logger] = Logger.new(STDERR)
options[:logger].level = options[:level]
options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
end |
|
@input = input.respond_to?(:read) ? input.read : input.to_s
parsing_terminals = false
@ast = []
parse(@input,
:syntax,
ISOEBNFMeta::RULES,
whitespace: %r{([\x09-\x0d\x20]|(?:\(\*(?:(?:\*[^\)])|[^*])*\*\)))+},
**options
) do |context, *data|
rule = case context
when :rule |
Parser invocation.On start, yield ourselves if a block is given, otherwise, return this parser instance @param [#read, #to_s] input @param [Hash{Symbol => Object}] options @option options [Boolean] :level Trace level. 0(debug), 1(info), 2(warn), 3(error). @return [EBNFParser] |
rule = data.first
rule.kind = :terminal if parsing_terminals
rule
end
@ast << rule if rule
end
@ast
end |
If the |
def to_sxp
require 'sxp' unless defined?(SXP) |
Read input, if necessary, which will be used in a Scanner. |
SXP::Generator.string(@ast.map(&:for_sxp))
end
end |
A rule which has already been turned into a |
|
Output formatted S-Expression of grammar |
|
Output rules as a formatted S-Expression |
|