Module: EBNF::LL1::Parser
- Included in:
- EBNFLL1Parser
- Defined in:
- lib/ebnf/ll1/parser.rb
Overview
A Generic LL1 parser using a lexer and branch tables defined using the SWAP tool chain (modified).
# Creating terminal definitions and parser rules to parse generated grammars
The parser is initialized to callbacks invoked on entry and exit to each terminal
and production
. A trivial parser loop can be described as follows:
require 'ebnf/ll1/parser'
require 'meta'
class Parser
include Meta
include EBNF::LL1::Parser
terminal(:SYMBOL, /([a-z]|[A-Z]|[0-9]|_)+/) do |prod, token, input|
# Add data based on scanned token to input
input[:symbol] = token.value
end
start_production(:rule) do |input, current, callback|
# Process on start of production
# Set state for entry into recursed rules through current
# Callback to parser loop with callback
end
production(:rule) do |input, current, callback|
# Process on end of production
# return results in input, retrieve results from recursed rules in current
# Callback to parser loop with callback
end
def initialize(input)
parse(input, start_symbol,
branch: BRANCH,
first: FIRST,
follow: FOLLOW,
cleanup: CLEANUP
) do |context, *data|
# Process calls from callback from productions
rescue ArgumentError, RDF::LL1::Parser::Error => e
progress("Parsing completed with errors:\n\t#{e.message}")
raise RDF::ReaderError, e.message if validate?
end
Defined Under Namespace
Modules: ClassMethods Classes: Error
Instance Attribute Summary collapse
-
#lineno ⇒ Integer
readonly
Line number of current token.
Instance Method Summary collapse
-
#add_prod_data(sym, *values) ⇒ Object
Add values to production data, values aranged as an array.
-
#add_prod_datum(sym, values) ⇒ Object
Add a single value to prod_data, allows for values to be an array.
-
#debug(node, message, **options) ⇒ Object
protected
Debug logging.
- #depth ⇒ Object
-
#error(node, message, **options) ⇒ Object
protected
Error information, used as level
3
logger messages. -
#parse(input = nil, start = nil, **options) {|context, *data| ... } ⇒ EBNF::LL1::Parser
Initializes a new parser instance.
-
#prod_data ⇒ Object
Current ProdData element.
-
#progress(node, message, **options, &block) ⇒ Object
protected
Progress logged when parsing.
-
#warn(node, message, **options) ⇒ Object
protected
Warning information, used as level
2
logger messages.
Instance Attribute Details
#lineno ⇒ Integer (readonly)
Returns line number of current token.
54 55 56 |
# File 'lib/ebnf/ll1/parser.rb', line 54 def lineno @lineno end |
Instance Method Details
#add_prod_data(sym, *values) ⇒ Object
Add values to production data, values aranged as an array
491 492 493 494 495 496 497 |
# File 'lib/ebnf/ll1/parser.rb', line 491 def add_prod_data(sym, *values) return if values.compact.empty? prod_data[sym] ||= [] prod_data[sym] += values debug("add_prod_data(#{sym})") {"#{prod_data[sym].inspect} += #{values.inspect}"} end |
#add_prod_datum(sym, values) ⇒ Object
Add a single value to prod_data, allows for values to be an array
475 476 477 478 479 480 481 482 483 484 485 486 487 488 |
# File 'lib/ebnf/ll1/parser.rb', line 475 def add_prod_datum(sym, values) case values when Array prod_data[sym] ||= [] debug("add_prod_datum(#{sym})") {"#{prod_data[sym].inspect} += #{values.inspect}"} prod_data[sym] += values when nil return else prod_data[sym] ||= [] debug("add_prod_datum(#{sym})") {"#{prod_data[sym].inspect} << #{values.inspect}"} prod_data[sym] << values end end |
#debug(node, message, **options) ⇒ Object (protected)
Debug logging.
The call is ignored, unless @options[:logger]
is set.
579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 |
# File 'lib/ebnf/ll1/parser.rb', line 579 def debug(*args, &block) return unless @options[:logger] = args.last.is_a?(Hash) ? args.pop : {} lineno = @lineno || ([:token].lineno if [:token].respond_to?(:lineno)) level = .fetch(:level, 0) depth = [:depth] || self.depth if self.respond_to?(:log_debug) level = [:debug, :info, :warn, :error, :fatal][level] log_debug(*args, **.merge(level: level, lineno: lineno, depth: depth), &block) elsif @options[:logger].respond_to?(:add) args << yield if block_given? @options[:logger].add(level, "[#{lineno}]" + (" " * depth) + args.join(" ")) elsif @options[:logger].respond_to?(:<<) args << yield if block_given? @options[:logger] << "[#{lineno}]" + (" " * depth) + args.join(" ") end end |
#depth ⇒ Object
469 |
# File 'lib/ebnf/ll1/parser.rb', line 469 def depth; (@productions || []).length; end |
#error(node, message, **options) ⇒ Object (protected)
Error information, used as level 3
logger messages. Messages may be logged and are saved for reporting at end of parsing.
511 512 513 514 515 516 517 518 519 520 521 522 523 524 |
# File 'lib/ebnf/ll1/parser.rb', line 511 def error(node, , **) lineno = @lineno || ([:token].lineno if [:token].respond_to?(:lineno)) m = "ERROR " m += "[line: #{lineno}] " if lineno m += m += " (found #{[:token].inspect})" if [:token] m += ", production = #{[:production].inspect}" if [:production] @error_log << m unless @recovering @recovering = true debug(node, m, level: .fetch(:level, 3), **) if [:raise] || @options[:validate] raise Error.new(m, lineno: lineno, token: [:token], production: [:production]) end end |
#parse(input = nil, start = nil, **options) {|context, *data| ... } ⇒ EBNF::LL1::Parser
Initializes a new parser instance.
Attempts to recover from errors.
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 |
# File 'lib/ebnf/ll1/parser.rb', line 266 def parse(input = nil, start = nil, **, &block) @options = .dup @branch = [:branch] @first = [:first] ||= {} @follow = [:follow] ||= {} @cleanup = [:cleanup] ||= {} @lexer = input.is_a?(Lexer) ? input : Lexer.new(input, self.class.patterns, **@options) @productions = [] @parse_callback = block @recovering = false @error_log = [] terminals = self.class.patterns.map(&:type) # Get defined terminals to help with branching # Unrecoverable errors raise Error, "Branch table not defined" unless @branch && @branch.length > 0 raise Error, "Starting production not defined" unless start @prod_data = [{}] start = start.split('#').last.to_sym unless start.is_a?(Symbol) todo_stack = [{prod: start, terms: nil}] while !todo_stack.empty? begin @recovering = false pushed = false if todo_stack.last[:terms].nil? todo_stack.last[:terms] = [] cur_prod = todo_stack.last[:prod] # If cur_prod is the starting production, we can reset the stack # to the beginning to avoid excessive growth in the production # stack if [:reset_on_start] && cur_prod == start todo_stack = [{prod: start, terms: []}] @productions = [] @prod_data = [{}] end # Fetch the current token token = get_token(:recover) # At this point, token is either nil, in the first set of the production, # or in the follow set of this production or any previous production debug("parse(production)") do "token #{token ? token.representation.inspect : 'nil'}, " + "prod #{cur_prod.inspect}, " + "depth #{depth}" end # Got an opened production onStart(cur_prod) if token.nil? if !(first_include?(cur_prod, :_eps) && follow_include?(cur_prod, :_eof)) # End of file, and production does not contain eps, or it does, but follow does not contain eof error("parse(production)", "Unexpected end of input", lineno: lineno, production: cur_prod, raise: true) else debug("parse(production)") {"End of input prod #{cur_prod.inspect}"} end elsif prod_branch = @branch[cur_prod] sequence = prod_branch.fetch(token.representation) do error("parse(production)", "Expected one of #{@first[cur_prod].inspect}", token: token, production: cur_prod, raise: true) end debug("parse(production)") do "token #{token.representation.inspect} " + "prod #{cur_prod.inspect}, " + "prod_branch #{prod_branch.keys.inspect}, " + "sequence #{sequence.inspect}" end todo_stack.last[:terms] += sequence else error("parse(production)", "Unexpected", token: token, production: cur_prod, raise: true) end end debug("parse(terms)") {"todo #{todo_stack.last.inspect}, depth #{depth}"} while !todo_stack.last[:terms].to_a.empty? # Get the next term in this sequence term = todo_stack.last[:terms].shift debug("parse(token)") {"accept #{term.inspect}"} if token = accept(term) debug("parse(token)") {"token #{token.inspect}, term #{term.inspect}"} onTerminal(term, token) elsif terminals.include?(term) # If term is a terminal, then it is an error if token does not # match it error("parse(token)", "Expected #{term.inspect}", token: get_token, production: cur_prod, raise: true) else token = get_token # If token is not in firsts of term, but eps is, skip to next # term if first_include?(term, :_eps) && !first_include?(term, token) debug("parse(token)") {"skip optional term #{term.inspect} on #{token.inspect}"} break else # Push term onto stack todo_stack << {prod: term, terms: nil} debug("parse(push)") {"term #{term.inspect}, depth #{depth}"} pushed = true break end end end rescue Lexer::Error, Error => e # Lexer encountered an illegal token or the parser encountered # a terminal which is inappropriate for the current production. # Perform error recovery to find a reasonable terminal based # on the follow sets of the relevant productions. This includes # remaining terms from the current production and the stacked # productions @lineno = e.lineno if e.is_a?(Lexer::Error) # Skip to the next valid terminal @lexer.recover error("parse(#{e.class})", "With input '#{e.input}': #{e.}", production: @productions.last, token: e.token) else # Otherwise, the terminal is fine, just not for this production. @lexer.shift error("parse(#{e.class})", "#{e.}", production: @productions.last, token: e.token) end # Get the list of follows for this sequence, this production and the stacked productions. debug("recovery", "stack follows:") todo_stack.reverse.each do |todo| debug("recovery") {" #{todo[:prod]}: #{@follow[todo[:prod]].inspect}"} end # Find all follows to the top of the stack follows = todo_stack.inject([]) do |follow, todo| prod = todo[:prod] follow += @follow[prod] || [] end.uniq debug("recovery") {"follows: #{follows.inspect}"} # Skip tokens until one is found in follows while (token = get_token(:recover)) && follows.none? {|t| token === t} skipped = @lexer.shift progress("recovery") {"skip #{skipped.inspect}"} end debug("recovery") {"found #{token.inspect} in follows"} # Pop stack elements until token is in follows while !todo_stack.empty? && !follow_include?(todo_stack.last[:prod], token || :_eof) debug("recovery(pop)") {"todo #{todo_stack.last.inspect}, depth #{depth}"} todo_stack.pop onFinish end # Token is now in the first of the top production unless todo_stack.empty? todo_stack.pop onFinish end if todo_stack.empty? # Recovered to end of last production warn("recover", "recovered to end of productions") else warn("recover", "recovered to #{todo_stack.last[:prod].inspect} with #{token.inspect}") end @recovering = false ensure # After completing the last production in a sequence, pop down until we find a production # # If in recovery mode, continue popping until we find a term with a follow list while !pushed && !todo_stack.empty? && todo_stack.last.fetch(:terms, []).empty? debug("parse(pop)") {"todo #{todo_stack.last.inspect}, depth #{depth}"} todo_stack.pop onFinish end end end error("parse(eof)", "Finished processing before end of file", token: @lexer.first) if @lexer.first # Continue popping contexts off of the stack while !todo_stack.empty? debug("parse(eof)") {"stack #{todo_stack.last.inspect}, depth #{depth}"} # There can't be anything left to do, or if there is, it must be optional last_terms = todo_stack.last[:terms] if last_terms.length > 0 && last_terms.none? {|t|first_include?(t, :_eps)} error("parse(eof)", "End of input before end of production: stack #{todo_stack.last.inspect}, depth #{depth}" ) end todo_stack.pop onFinish end # When all is said and done, raise the error log unless @error_log.empty? raise Error, @error_log.join("\n") end end |
#prod_data ⇒ Object
Current ProdData element
472 |
# File 'lib/ebnf/ll1/parser.rb', line 472 def prod_data; @prod_data.last; end |
#progress(node, message, **options, &block) ⇒ Object (protected)
Progress logged when parsing. Passed as level 1
logger messages.
The call is ignored, unless @options[:logger]
is set.
559 560 561 562 563 564 565 566 |
# File 'lib/ebnf/ll1/parser.rb', line 559 def progress(node, *args, &block) return unless @options[:logger] lineno = @lineno || ([:token].lineno if [:token].respond_to?(:lineno)) args << {} unless args.last.is_a?(Hash) args.last[:level] ||= 1 args.last[:lineno] ||= lineno debug(node, *args, &block) end |
#warn(node, message, **options) ⇒ Object (protected)
Warning information, used as level 2
logger messages. Messages may be logged and are saved for reporting at end of parsing.
536 537 538 539 540 541 542 543 544 545 |
# File 'lib/ebnf/ll1/parser.rb', line 536 def warn(node, , **) lineno = @lineno || ([:token].lineno if [:token].respond_to?(:lineno)) m = "WARNING " m += "[line: #{lineno}] " if lineno m += m += " (found #{[:token].inspect})" if [:token] m += ", production = #{[:production].inspect}" if [:production] @error_log << m unless @recovering debug(node, m, level: 2, lineno: lineno, **) end |