From 7bd9e90988a9f646ca452e31aa3d80d7e46f710e Mon Sep 17 00:00:00 2001 From: Specy Date: Sun, 17 Nov 2024 13:52:25 +0100 Subject: [PATCH] fix: correctly construct first and follow tables --- assets/grammars/correct/indirect_empty.lr | 7 ++ src/tables.rs | 6 +- tests/common.rs | 1 + tests/parser.rs | 91 +++++++++++++++++++++++ 4 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 assets/grammars/correct/indirect_empty.lr diff --git a/assets/grammars/correct/indirect_empty.lr b/assets/grammars/correct/indirect_empty.lr new file mode 100644 index 0000000..1da7e84 --- /dev/null +++ b/assets/grammars/correct/indirect_empty.lr @@ -0,0 +1,7 @@ +S -> A B + +A -> C +C -> D +D -> '' + +B -> 'x' diff --git a/src/tables.rs b/src/tables.rs index 04b509e..3769e1a 100644 --- a/src/tables.rs +++ b/src/tables.rs @@ -12,6 +12,7 @@ impl FirstTable { /// Constructs the first table from the grammar. pub fn construct(grammar: &Grammar) -> FirstTable { let mut first_table = IndexMap::new(); + let mut indirectly_empty_symbols = IndexSet::new(); let mut done = false; while !done { @@ -32,7 +33,9 @@ impl FirstTable { .cloned(), ); } - if !grammar.empty_symbols().contains(symbol) { + if !grammar.empty_symbols().contains(symbol) + && !indirectly_empty_symbols.contains(symbol) + { break; } }, @@ -42,6 +45,7 @@ impl FirstTable { }, } if index == rule.pattern().len() - 1 { + indirectly_empty_symbols.insert(rule.symbol()); possible_first_tokens.insert(Token::Empty); } } diff --git a/tests/common.rs b/tests/common.rs index d7cc9cd..2c5d171 100644 --- a/tests/common.rs +++ b/tests/common.rs @@ -12,6 +12,7 @@ pub mod grammars { pub const G9: &str = include_str!("../assets/grammars/correct/g9.lr"); pub const G10: &str = include_str!("../assets/grammars/correct/g10.lr"); pub const G11: &str = include_str!("../assets/grammars/correct/g11.lr"); + pub const INDIRECT_EMPTY: &str = include_str!("../assets/grammars/correct/indirect_empty.lr"); pub const JSON: &str = include_str!("../assets/grammars/correct/json.lr"); pub const NOT_LALR: &str = include_str!("../assets/grammars/correct/not-lalr.lr"); pub const OPTIONAL: &str = include_str!("../assets/grammars/correct/optional.lr"); diff --git a/tests/parser.rs b/tests/parser.rs index 5bdc2c7..936587e 100644 --- a/tests/parser.rs +++ b/tests/parser.rs @@ -144,6 +144,97 @@ fn raising_correct_error_when_creating_lalr_parser_for_non_lalr_grammar() { } +#[test] +#[cfg_attr(target_family = "wasm", wasm_bindgen_test)] +fn correctly_creating_first_and_follow_sets_for_indirectly_empty_grammar() { + let grammar = Grammar::parse(common::grammars::INDIRECT_EMPTY).unwrap(); + let parser = Parser::lr(grammar).unwrap(); + + let first_table = parser.first_table(); + { + // +--------+-----------+ + // | Symbol | First Set | + // +--------+-----------+ + // | C | { ε } | + // | D | { ε } | + // | B | { 'x' } | + // | A | { ε } | + // | S | { 'x' } | + // +--------+-----------+ + + #[rustfmt::skip] + assert_eq!( + *first_table.deref(), + [ + ( + Symbol::from("C"), + [Token::Empty.into()].into(), + ), + ( + Symbol::from("D"), + [Token::Empty.into()].into(), + ), + ( + Symbol::from("B"), + [ConstantToken::from("x").into()].into(), + ), + ( + Symbol::from("A"), + [Token::Empty.into()].into(), + ), + ( + Symbol::from("S"), + [ConstantToken::from("x").into()].into(), + ) + ] + .into_iter() + .collect::>() + ); + } + + let follow_table = parser.follow_table(); + { + // +--------+------------+ + // | Symbol | Follow Set | + // +--------+------------+ + // | C | { 'x' } | + // | D | { 'x' } | + // | B | { $ } | + // | A | { 'x' } | + // | S | { $ } | + // +--------+------------+ + + #[rustfmt::skip] + assert_eq!( + *follow_table.deref(), + [ + ( + Symbol::from("C"), + [ConstantToken::from("x").into()].into(), + ), + ( + Symbol::from("D"), + [ConstantToken::from("x").into()].into(), + ), + ( + Symbol::from("B"), + [Token::Eof].into(), + ), + ( + Symbol::from("A"), + [ConstantToken::from("x").into()].into(), + ), + ( + Symbol::from("S"), + [Token::Eof].into(), + ), + ] + .into_iter() + .collect::>() + ); + } +} + #[test] #[cfg_attr(target_family = "wasm", wasm_bindgen_test)] fn correctly_creating_lr_parser_for_binary_addition_grammar() {