Skip to content

Commit

Permalink
Update parser
Browse files Browse the repository at this point in the history
  • Loading branch information
junk0612 committed Sep 18, 2023
1 parent a1c2371 commit bc6ea34
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 46 deletions.
35 changes: 35 additions & 0 deletions lib/lrama/grammar.rb
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,41 @@ def nterms
@nterms ||= @symbols.select(&:nterm?)
end

def extract_references
@_rules.each do |lhs, rhs, _|
rhs.each_with_index do |token, index|
next if token.type != Lrama::Lexer::Token::User_code

scanner = StringScanner.new(token.s_value)
references = []

while !scanner.eos? do
start = scanner.pos
case
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
tag = scanner[1] ? create_token(Token::Tag, scanner[1], line, str.length) : nil
references << [:dollar, "$", tag, start, scanner.pos - 1]
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
tag = scanner[1] ? create_token(Token::Tag, scanner[1], line, str.length) : nil
references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_.][-a-zA-Z0-9_.]*)/) # $foo, $expr, $<long>program
tag = scanner[1] ? create_token(Token::Tag, scanner[1], line, str.length) : nil
references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
when scanner.scan(/@\$/) # @$
references << [:at, "$", nil, start, scanner.pos - 1]
when scanner.scan(/@(\d)+/) # @1
references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
else
scanner.getch
end
end

token.references = references
build_references(token)
end
end
end

private

def find_nterm_by_id!(id)
Expand Down
11 changes: 1 addition & 10 deletions lib/lrama/new_lexer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def initialize(text)
def next_token
case @status
when :initial
pp @line
lex_token
when :c_declaration
lex_c_code
Expand Down Expand Up @@ -100,16 +101,6 @@ def lex_c_code
@head = @scanner.pos + 1
when @scanner.scan(/"/)
code += %Q("#{@scanner.scan_until(/"/)[0..-2]}")
when @scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
references << [:dollar, "$", tag, str.length, str.length + @scanner[0].length - 1]
when @scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
references << [:dollar, Integer(@scanner[2]), tag, str.length, str.length + @scanner[0].length - 1]
when @scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_.][-a-zA-Z0-9_.]*)/) # $foo, $expr, $<long>program
references << [:dollar, @scanner[2], tag, str.length, str.length + @scanner[0].length - 1]
when @scanner.scan(/@\$/) # @$
references << [:at, "$", nil, str.length, str.length + @scanner[0].length - 1]
when @scanner.scan(/@(\d)+/) # @1
references << [:at, Integer(@scanner[1]), nil, str.length, str.length + @scanner[0].length - 1]
else
code += @scanner.getch
end
Expand Down
38 changes: 27 additions & 11 deletions lib/lrama/new_parser.rb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 9 additions & 8 deletions parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -27,21 +27,21 @@ rule

symbol_declaration: "%token" token_declarations
| "%type" symbol_declarations { val[1][:tokens].each {|id| @grammar.add_type(id: id, tag: val[1][:tag]) } }
| "%left" token_declarations_for_precedence { val[1].each {|hash| hash[:tokens].each {|id| sym = @grammar.add_term(id: id); @grammar.add_left(sym, @precedence_number); @precedence_number += 1 } } }
| "%left" token_declarations_for_precedence { val[1].each {|hash| hash[:tokens].each {|id| sym = @grammar.add_term(id: id); @grammar.add_left(sym, @precedence_number) }; @precedence_number += 1 } }
| "%right" token_declarations_for_precedence
| "%nonassoc" token_declarations_for_precedence

token_declarations: token_declaration_list { result = val[0] }
| TAG token_declaration_list { result = val }
| token_declarations TAG token_declaration_list
token_declarations: token_declaration_list { val[0].each {|token_declaration| @grammar.add_term(id: token_declaration[0], alias_name: token_declaration[2], token_id: token_declaration[1], tag: nil, replace: true) } }
| TAG token_declaration_list { val[1].each {|token_declaration| @grammar.add_term(id: token_declaration[0], alias_name: token_declaration[2], token_id: token_declaration[1], tag: Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: val[0]), replace: true) } }
| token_declarations TAG token_declaration_list { val[2].each {|token_declaration| @grammar.add_term(id: token_declaration[0], alias_name: token_declaration[2], token_id: token_declaration[1], tag: Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: val[1]), replace: true) } }

token_declaration_list: token_declaration { result = val }
| token_declaration_list token_declaration { result = val }
token_declaration_list: token_declaration { result = [val[0]] }
| token_declaration_list token_declaration { result = val[0].append(val[1]) }

token_declaration: id int_opt alias { result = val }

int_opt: # empty
| INTEGER
| INTEGER { result = Integer(val[0]) }

alias: # empty
| string_as_id
Expand All @@ -64,7 +64,7 @@ rule
| token_declarations_for_precedence token_declaration_list_for_precedence { result = val[0].concat({tag: nil, tokens: val[1]}) }

token_declaration_list_for_precedence: token_declaration_for_precedence { result = [val[0]] }
| token_declaration_list_for_precedence token_declaration_for_precedence
| token_declaration_list_for_precedence token_declaration_for_precedence { result = val[0].append(val[1]) }

token_declaration_for_precedence: id

Expand Down Expand Up @@ -123,6 +123,7 @@ def parse
@grammar = Lrama::Grammar.new
@precedence_number = 0
do_parse
@grammar.extract_references
@grammar.prepare
@grammar.compute_nullable
@grammar.validate!
Expand Down
58 changes: 41 additions & 17 deletions spec/lrama/new_parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,47 @@
describe '#parse' do
subject { described_class.new(grammar).parse }

let(:grammar) { File.read('sample/calc.y') }

it 'returns the same results as Lrama::Parser' do
expected = Lrama::Parser.new(grammar).parse

expect(subject.aux.prologue_first_lineno).to eq(expected.aux.prologue_first_lineno)
expect(subject.aux.prologue.strip).to eq(expected.aux.prologue.strip)
expect(subject.union.code).to eq(expected.union.code)
expect(subject.union.lineno).to eq(expected.union.lineno)
expect(subject.union).to eq(expected.union)
expect(subject.types).to eq(expected.types)

subject.instance_variable_get(:@_rules).each_with_index {|rule, id| expect(rule).to eq(expected.instance_variable_get(:@_rules)[id]) }
expect(subject.instance_variable_get(:@_rules)).to eq(expected.instance_variable_get(:@_rules))
expect(subject.aux.epilogue_first_lineno).to eq(expected.aux.epilogue_first_lineno)
expect(subject.aux.epilogue.strip).to eq(expected.aux.epilogue.strip)
expect(subject).to eq(expected)
describe 'sample/calc.y' do
let(:grammar) { File.read('sample/calc.y') }

it 'returns the same results as Lrama::Parser' do
expected = Lrama::Parser.new(grammar).parse

expect(subject.aux.prologue_first_lineno).to eq(expected.aux.prologue_first_lineno)
expect(subject.aux.prologue.strip).to eq(expected.aux.prologue.strip)
expect(subject.union.code).to eq(expected.union.code)
expect(subject.union.lineno).to eq(expected.union.lineno)
expect(subject.union).to eq(expected.union)
expect(subject.types).to eq(expected.types)
subject.instance_variable_get(:@_rules).each_with_index {|rule, id| expect(rule).to eq(expected.instance_variable_get(:@_rules)[id]) }
expect(subject.instance_variable_get(:@_rules)).to eq(expected.instance_variable_get(:@_rules))
expect(subject.aux.epilogue_first_lineno).to eq(expected.aux.epilogue_first_lineno)
expect(subject.aux.epilogue.strip).to eq(expected.aux.epilogue.strip)

subject.rules.each_with_index {|rule, i| expect(rule).to eq(expected.rules[i]) }
end
end

describe 'sample/ruby_parse.y' do
let(:grammar) { File.read('sample/ruby_parse.y') }

it 'returns the same results as Lrama::Parser' do
expected = Lrama::Parser.new(grammar).parse

expect(subject.aux.prologue_first_lineno).to eq(expected.aux.prologue_first_lineno)
expect(subject.aux.prologue.strip).to eq(expected.aux.prologue.strip)
expect(subject.union.code).to eq(expected.union.code)
expect(subject.union.lineno).to eq(expected.union.lineno)
expect(subject.union).to eq(expected.union)
expect(subject.types).to eq(expected.types)
subject.instance_variable_get(:@_rules).each_with_index {|rule, id| expect(rule).to eq(expected.instance_variable_get(:@_rules)[id]) }
expect(subject.instance_variable_get(:@_rules)).to eq(expected.instance_variable_get(:@_rules))
expect(subject.aux.epilogue_first_lineno).to eq(expected.aux.epilogue_first_lineno)
expect(subject.aux.epilogue.strip).to eq(expected.aux.epilogue.strip)

subject.rules.each_with_index {|rule, i| expect(rule).to eq(expected.rules[i]) }
expect(subject).to eq(expected)
end
end
end
end

0 comments on commit bc6ea34

Please sign in to comment.