From bc6ea34fd3ea6cb77951f4856b7f665457952cee Mon Sep 17 00:00:00 2001 From: Junichi Kobayashi Date: Mon, 18 Sep 2023 17:29:56 +0900 Subject: [PATCH] Update parser --- lib/lrama/grammar.rb | 35 +++++++++++++++++++++ lib/lrama/new_lexer.rb | 11 +------ lib/lrama/new_parser.rb | 38 ++++++++++++++++------- parser.y | 17 +++++----- spec/lrama/new_parser_spec.rb | 58 +++++++++++++++++++++++++---------- 5 files changed, 113 insertions(+), 46 deletions(-) diff --git a/lib/lrama/grammar.rb b/lib/lrama/grammar.rb index 81df3996..8a054d3e 100644 --- a/lib/lrama/grammar.rb +++ b/lib/lrama/grammar.rb @@ -306,6 +306,41 @@ def nterms @nterms ||= @symbols.select(&:nterm?) end + def extract_references + @_rules.each do |lhs, rhs, _| + rhs.each_with_index do |token, index| + next if token.type != Lrama::Lexer::Token::User_code + + scanner = StringScanner.new(token.s_value) + references = [] + + while !scanner.eos? do + start = scanner.pos + case + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $$ + tag = scanner[1] ? create_token(Token::Tag, scanner[1], line, str.length) : nil + references << [:dollar, "$", tag, start, scanner.pos - 1] + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $1 + tag = scanner[1] ? create_token(Token::Tag, scanner[1], line, str.length) : nil + references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1] + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_.][-a-zA-Z0-9_.]*)/) # $foo, $expr, $program + tag = scanner[1] ? create_token(Token::Tag, scanner[1], line, str.length) : nil + references << [:dollar, scanner[2], tag, start, scanner.pos - 1] + when scanner.scan(/@\$/) # @$ + references << [:at, "$", nil, start, scanner.pos - 1] + when scanner.scan(/@(\d)+/) # @1 + references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1] + else + scanner.getch + end + end + + token.references = references + build_references(token) + end + end + end + private def find_nterm_by_id!(id) diff --git a/lib/lrama/new_lexer.rb b/lib/lrama/new_lexer.rb index d50510c7..c6d78e2b 100644 --- a/lib/lrama/new_lexer.rb +++ b/lib/lrama/new_lexer.rb @@ -16,6 +16,7 @@ def initialize(text) def next_token case @status when :initial + pp @line lex_token when :c_declaration lex_c_code @@ -100,16 +101,6 @@ def lex_c_code @head = @scanner.pos + 1 when @scanner.scan(/"/) code += %Q("#{@scanner.scan_until(/"/)[0..-2]}") - when @scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $$ - references << [:dollar, "$", tag, str.length, str.length + @scanner[0].length - 1] - when @scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $1 - references << [:dollar, Integer(@scanner[2]), tag, str.length, str.length + @scanner[0].length - 1] - when @scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_.][-a-zA-Z0-9_.]*)/) # $foo, $expr, $program - references << [:dollar, @scanner[2], tag, str.length, str.length + @scanner[0].length - 1] - when @scanner.scan(/@\$/) # @$ - references << [:at, "$", nil, str.length, str.length + @scanner[0].length - 1] - when @scanner.scan(/@(\d)+/) # @1 - references << [:at, Integer(@scanner[1]), nil, str.length, str.length + @scanner[0].length - 1] else code += @scanner.getch end diff --git a/lib/lrama/new_parser.rb b/lib/lrama/new_parser.rb index 747d9447..ec085178 100644 --- a/lib/lrama/new_parser.rb +++ b/lib/lrama/new_parser.rb @@ -20,6 +20,7 @@ def parse @grammar = Lrama::Grammar.new @precedence_number = 0 do_parse + @grammar.extract_references @grammar.prepare @grammar.compute_nullable @grammar.validate! @@ -172,12 +173,12 @@ def next_token 2, 51, :_reduce_none, 1, 58, :_reduce_33, 2, 58, :_reduce_34, - 3, 58, :_reduce_none, + 3, 58, :_reduce_35, 1, 61, :_reduce_36, 2, 61, :_reduce_37, 3, 62, :_reduce_38, 0, 64, :_reduce_none, - 1, 64, :_reduce_none, + 1, 64, :_reduce_40, 0, 65, :_reduce_none, 1, 65, :_reduce_none, 1, 65, :_reduce_none, @@ -197,7 +198,7 @@ def next_token 2, 60, :_reduce_57, 2, 60, :_reduce_58, 1, 72, :_reduce_59, - 2, 72, :_reduce_none, + 2, 72, :_reduce_60, 1, 73, :_reduce_none, 1, 63, :_reduce_62, 1, 63, :_reduce_63, @@ -512,7 +513,7 @@ def _reduce_29(val, _values, result) module_eval(<<'.,.,', 'parser.y', 29) def _reduce_30(val, _values, result) - val[1].each {|hash| hash[:tokens].each {|id| sym = @grammar.add_term(id: id); @grammar.add_left(sym, @precedence_number); @precedence_number += 1 } } + val[1].each {|hash| hash[:tokens].each {|id| sym = @grammar.add_term(id: id); @grammar.add_left(sym, @precedence_number) }; @precedence_number += 1 } result end .,., @@ -523,30 +524,35 @@ def _reduce_30(val, _values, result) module_eval(<<'.,.,', 'parser.y', 33) def _reduce_33(val, _values, result) - result = val[0] + val[0].each {|token_declaration| @grammar.add_term(id: token_declaration[0], alias_name: token_declaration[2], token_id: token_declaration[1], tag: nil, replace: true) } result end .,., module_eval(<<'.,.,', 'parser.y', 34) def _reduce_34(val, _values, result) - result = val + val[1].each {|token_declaration| @grammar.add_term(id: token_declaration[0], alias_name: token_declaration[2], token_id: token_declaration[1], tag: Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: val[0]), replace: true) } result end .,., -# reduce 35 omitted +module_eval(<<'.,.,', 'parser.y', 35) + def _reduce_35(val, _values, result) + val[2].each {|token_declaration| @grammar.add_term(id: token_declaration[0], alias_name: token_declaration[2], token_id: token_declaration[1], tag: Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: val[1]), replace: true) } + result + end +.,., module_eval(<<'.,.,', 'parser.y', 37) def _reduce_36(val, _values, result) - result = val + result = [val[0]] result end .,., module_eval(<<'.,.,', 'parser.y', 38) def _reduce_37(val, _values, result) - result = val + result = val[0].append(val[1]) result end .,., @@ -560,7 +566,12 @@ def _reduce_38(val, _values, result) # reduce 39 omitted -# reduce 40 omitted +module_eval(<<'.,.,', 'parser.y', 43) + def _reduce_40(val, _values, result) + result = Integer(val[0]) + result + end +.,., # reduce 41 omitted @@ -655,7 +666,12 @@ def _reduce_59(val, _values, result) end .,., -# reduce 60 omitted +module_eval(<<'.,.,', 'parser.y', 66) + def _reduce_60(val, _values, result) + result = val[0].append(val[1]) + result + end +.,., # reduce 61 omitted diff --git a/parser.y b/parser.y index a3cb1a1a..cbbee083 100644 --- a/parser.y +++ b/parser.y @@ -27,21 +27,21 @@ rule symbol_declaration: "%token" token_declarations | "%type" symbol_declarations { val[1][:tokens].each {|id| @grammar.add_type(id: id, tag: val[1][:tag]) } } - | "%left" token_declarations_for_precedence { val[1].each {|hash| hash[:tokens].each {|id| sym = @grammar.add_term(id: id); @grammar.add_left(sym, @precedence_number); @precedence_number += 1 } } } + | "%left" token_declarations_for_precedence { val[1].each {|hash| hash[:tokens].each {|id| sym = @grammar.add_term(id: id); @grammar.add_left(sym, @precedence_number) }; @precedence_number += 1 } } | "%right" token_declarations_for_precedence | "%nonassoc" token_declarations_for_precedence - token_declarations: token_declaration_list { result = val[0] } - | TAG token_declaration_list { result = val } - | token_declarations TAG token_declaration_list + token_declarations: token_declaration_list { val[0].each {|token_declaration| @grammar.add_term(id: token_declaration[0], alias_name: token_declaration[2], token_id: token_declaration[1], tag: nil, replace: true) } } + | TAG token_declaration_list { val[1].each {|token_declaration| @grammar.add_term(id: token_declaration[0], alias_name: token_declaration[2], token_id: token_declaration[1], tag: Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: val[0]), replace: true) } } + | token_declarations TAG token_declaration_list { val[2].each {|token_declaration| @grammar.add_term(id: token_declaration[0], alias_name: token_declaration[2], token_id: token_declaration[1], tag: Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: val[1]), replace: true) } } - token_declaration_list: token_declaration { result = val } - | token_declaration_list token_declaration { result = val } + token_declaration_list: token_declaration { result = [val[0]] } + | token_declaration_list token_declaration { result = val[0].append(val[1]) } token_declaration: id int_opt alias { result = val } int_opt: # empty - | INTEGER + | INTEGER { result = Integer(val[0]) } alias: # empty | string_as_id @@ -64,7 +64,7 @@ rule | token_declarations_for_precedence token_declaration_list_for_precedence { result = val[0].concat({tag: nil, tokens: val[1]}) } token_declaration_list_for_precedence: token_declaration_for_precedence { result = [val[0]] } - | token_declaration_list_for_precedence token_declaration_for_precedence + | token_declaration_list_for_precedence token_declaration_for_precedence { result = val[0].append(val[1]) } token_declaration_for_precedence: id @@ -123,6 +123,7 @@ def parse @grammar = Lrama::Grammar.new @precedence_number = 0 do_parse + @grammar.extract_references @grammar.prepare @grammar.compute_nullable @grammar.validate! diff --git a/spec/lrama/new_parser_spec.rb b/spec/lrama/new_parser_spec.rb index 3336aaaf..c52a9f08 100644 --- a/spec/lrama/new_parser_spec.rb +++ b/spec/lrama/new_parser_spec.rb @@ -2,23 +2,47 @@ describe '#parse' do subject { described_class.new(grammar).parse } - let(:grammar) { File.read('sample/calc.y') } - - it 'returns the same results as Lrama::Parser' do - expected = Lrama::Parser.new(grammar).parse - - expect(subject.aux.prologue_first_lineno).to eq(expected.aux.prologue_first_lineno) - expect(subject.aux.prologue.strip).to eq(expected.aux.prologue.strip) - expect(subject.union.code).to eq(expected.union.code) - expect(subject.union.lineno).to eq(expected.union.lineno) - expect(subject.union).to eq(expected.union) - expect(subject.types).to eq(expected.types) - - subject.instance_variable_get(:@_rules).each_with_index {|rule, id| expect(rule).to eq(expected.instance_variable_get(:@_rules)[id]) } - expect(subject.instance_variable_get(:@_rules)).to eq(expected.instance_variable_get(:@_rules)) - expect(subject.aux.epilogue_first_lineno).to eq(expected.aux.epilogue_first_lineno) - expect(subject.aux.epilogue.strip).to eq(expected.aux.epilogue.strip) - expect(subject).to eq(expected) + describe 'sample/calc.y' do + let(:grammar) { File.read('sample/calc.y') } + + it 'returns the same results as Lrama::Parser' do + expected = Lrama::Parser.new(grammar).parse + + expect(subject.aux.prologue_first_lineno).to eq(expected.aux.prologue_first_lineno) + expect(subject.aux.prologue.strip).to eq(expected.aux.prologue.strip) + expect(subject.union.code).to eq(expected.union.code) + expect(subject.union.lineno).to eq(expected.union.lineno) + expect(subject.union).to eq(expected.union) + expect(subject.types).to eq(expected.types) + subject.instance_variable_get(:@_rules).each_with_index {|rule, id| expect(rule).to eq(expected.instance_variable_get(:@_rules)[id]) } + expect(subject.instance_variable_get(:@_rules)).to eq(expected.instance_variable_get(:@_rules)) + expect(subject.aux.epilogue_first_lineno).to eq(expected.aux.epilogue_first_lineno) + expect(subject.aux.epilogue.strip).to eq(expected.aux.epilogue.strip) + + subject.rules.each_with_index {|rule, i| expect(rule).to eq(expected.rules[i]) } + end + end + + describe 'sample/ruby_parse.y' do + let(:grammar) { File.read('sample/ruby_parse.y') } + + it 'returns the same results as Lrama::Parser' do + expected = Lrama::Parser.new(grammar).parse + + expect(subject.aux.prologue_first_lineno).to eq(expected.aux.prologue_first_lineno) + expect(subject.aux.prologue.strip).to eq(expected.aux.prologue.strip) + expect(subject.union.code).to eq(expected.union.code) + expect(subject.union.lineno).to eq(expected.union.lineno) + expect(subject.union).to eq(expected.union) + expect(subject.types).to eq(expected.types) + subject.instance_variable_get(:@_rules).each_with_index {|rule, id| expect(rule).to eq(expected.instance_variable_get(:@_rules)[id]) } + expect(subject.instance_variable_get(:@_rules)).to eq(expected.instance_variable_get(:@_rules)) + expect(subject.aux.epilogue_first_lineno).to eq(expected.aux.epilogue_first_lineno) + expect(subject.aux.epilogue.strip).to eq(expected.aux.epilogue.strip) + + subject.rules.each_with_index {|rule, i| expect(rule).to eq(expected.rules[i]) } + expect(subject).to eq(expected) + end end end end