diff --git a/cfgrammar/src/lib/yacc/grammar.rs b/cfgrammar/src/lib/yacc/grammar.rs index bfefa12b6..d3ca4f19c 100644 --- a/cfgrammar/src/lib/yacc/grammar.rs +++ b/cfgrammar/src/lib/yacc/grammar.rs @@ -491,8 +491,8 @@ where } /// Get the action return type as defined by the user - pub fn actiontype(&self) -> &Option { - &self.actiontype + pub fn actiontype(&self) -> Option<&String> { + self.actiontype.as_ref() } /// Get the programs part of the grammar @@ -557,6 +557,22 @@ where } } + /// Returns the string representation of a given production `pidx`. + pub fn pp_prod(&self, pidx: PIdx) -> String { + let mut sprod = String::new(); + let ridx = self.prod_to_rule(pidx); + sprod.push_str(self.rule_name(ridx)); + sprod.push_str(":"); + for sym in self.prod(pidx) { + let s = match sym { + Symbol::Token(tidx) => self.token_name(*tidx).unwrap(), + Symbol::Rule(ridx) => self.rule_name(*ridx) + }; + sprod.push_str(&format!(" \"{}\"", s)); + } + sprod + } + /// Return a `SentenceGenerator` which can then generate minimal sentences for any rule /// based on the user-defined `token_cost` function which gives the associated cost for /// generating each token (where the cost must be greater than 0). Note that multiple diff --git a/lrpar/Cargo.toml b/lrpar/Cargo.toml index 52e732e80..616dd8292 100644 --- a/lrpar/Cargo.toml +++ b/lrpar/Cargo.toml @@ -29,3 +29,4 @@ vob = "2.0" regex = "1.0" [dev-dependencies] +temp_testdir = "0.2" diff --git a/lrpar/src/lib/ctbuilder.rs b/lrpar/src/lib/ctbuilder.rs index 1a4bf942c..578938bea 100644 --- a/lrpar/src/lib/ctbuilder.rs +++ b/lrpar/src/lib/ctbuilder.rs @@ -35,7 +35,7 @@ use std::{ convert::AsRef, env::{current_dir, var}, error::Error, - fmt::Debug, + fmt::{self, Debug}, fs::{self, read_to_string, File}, hash::Hash, io::Write, @@ -49,7 +49,7 @@ use cfgrammar::{ Symbol }; use filetime::FileTime; -use lrtable::{from_yacc, Minimiser, StateGraph, StateTable}; +use lrtable::{from_yacc, statetable::Conflicts, Minimiser, StateGraph, StateTable}; use num_traits::{AsPrimitive, PrimInt, Unsigned}; use regex::Regex; use serde::{Deserialize, Serialize}; @@ -79,14 +79,72 @@ pub enum ActionKind { GenericParseTree } +struct CTConflictsError { + pub grm: YaccGrammar, + pub sgraph: StateGraph, + pub stable: StateTable +} + +impl fmt::Display for CTConflictsError +where + StorageT: 'static + Debug + Hash + PrimInt + Serialize + TypeName + Unsigned, + usize: AsPrimitive, + u32: AsPrimitive +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let conflicts = self.stable.conflicts().unwrap(); + write!( + f, + "CTConflictsError{{{} Shift/Reduce, {} Reduce/Reduce}}", + conflicts.sr_len(), + conflicts.rr_len() + ) + } +} + +impl fmt::Debug for CTConflictsError +where + StorageT: 'static + Debug + Hash + PrimInt + Serialize + TypeName + Unsigned, + usize: AsPrimitive, + u32: AsPrimitive +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let conflicts = self.stable.conflicts().unwrap(); + write!( + f, + "CTConflictsError{{{} Shift/Reduce, {} Reduce/Reduce}}", + conflicts.sr_len(), + conflicts.rr_len() + ) + } +} + +impl Error for CTConflictsError +where + StorageT: 'static + Debug + Hash + PrimInt + Serialize + TypeName + Unsigned, + usize: AsPrimitive, + u32: AsPrimitive +{ +} + /// A `CTParserBuilder` allows one to specify the criteria for building a statically generated /// parser. -pub struct CTParserBuilder { - // Anything stored in here almost certainly needs to be included as part of the rebuild_cache - // function below so that, if it's changed, the grammar is rebuilt. +pub struct CTParserBuilder +where + StorageT: Eq + Hash +{ + // Anything stored in here (except `conflicts` and `error_on_conflict`) almost certainly needs + // to be included as part of the rebuild_cache function below so that, if it's changed, the + // grammar is rebuilt. recoverer: RecoveryKind, phantom: PhantomData, - actionkind: ActionKind + actionkind: ActionKind, + error_on_conflicts: bool, + conflicts: Option<( + YaccGrammar, + StateGraph, + StateTable + )> } impl CTParserBuilder { @@ -132,7 +190,9 @@ where CTParserBuilder { recoverer: RecoveryKind::MF, phantom: PhantomData, - actionkind: ActionKind::GenericParseTree + actionkind: ActionKind::GenericParseTree, + error_on_conflicts: true, + conflicts: None } } @@ -156,9 +216,9 @@ where /// If `StorageT` is not big enough to index the grammar's tokens, rules, or /// productions. pub fn process_file_in_src( - &self, + &mut self, srcp: &str - ) -> Result<(HashMap), Box> { + ) -> Result, Box> { let mut inp = current_dir()?; inp.push("src"); inp.push(srcp); @@ -173,6 +233,30 @@ where self } + /// If set to true, `process_file_in_src` will return an error if the given grammar contains + /// any Shift/Reduce or Reduce/Reduce conflicts. Defaults to `true`. + pub fn error_on_conflicts(mut self, b: bool) -> Self { + self.error_on_conflicts = b; + self + } + + /// If there are any conflicts in the grammar, return a tuple which allows users to inspect + /// and pretty print them; otherwise returns `None`. Note: The conflicts feature is currently + /// unstable and may change in the future. + pub fn conflicts( + &self + ) -> Option<( + &YaccGrammar, + &StateGraph, + &StateTable, + &Conflicts + )> { + if let Some((grm, sgraph, stable)) = &self.conflicts { + return Some((grm, sgraph, stable, &stable.conflicts().unwrap())); + } + None + } + /// Statically compile the Yacc file `inp` into Rust, placing the output file(s) into /// the directory `outd`. The latter defines a module with the following functions: /// @@ -194,10 +278,10 @@ where /// If `StorageT` is not big enough to index the grammar's tokens, rules, or /// productions. pub fn process_file( - &self, + &mut self, inp: P, outd: Q - ) -> Result<(HashMap), Box> + ) -> Result, Box> where P: AsRef, Q: AsRef @@ -256,6 +340,15 @@ where fs::remove_file(&outp_rs).ok(); let (sgraph, stable) = from_yacc(&grm, Minimiser::Pager)?; + + if stable.conflicts().is_some() && self.error_on_conflicts { + return Err(Box::new(CTConflictsError { + grm, + sgraph, + stable + })); + } + // Because we're lazy, we don't write our own serializer. We use serde and bincode to // create files $out_base.grm, $out_base.sgraph, and $out_base.out_stable which contain // binary versions of the relevant structs, and then include those binary files into the @@ -268,12 +361,12 @@ where // Header let mod_name = inp.as_ref().file_stem().unwrap().to_str().unwrap(); let actiontype = match grm.actiontype() { - Some(t) => t, + Some(t) => t.clone(), // Probably unneeded once NLL is in stable None => { match self.actionkind { ActionKind::CustomAction => panic!("Action return type not defined!"), ActionKind::GenericParseTree => { - "" // Dummy string that will never be used + String::new() // Dummy string that will never be used } } } @@ -475,6 +568,10 @@ where let mut f = File::create(outp_rs)?; f.write_all(outs.as_bytes())?; + + if stable.conflicts().is_some() { + self.conflicts = Some((grm, sgraph, stable)); + } Ok(rule_ids) } @@ -565,3 +662,69 @@ pub fn _reconstitute<'a, StorageT: Deserialize<'a> + Hash + PrimInt + Unsigned>( let stable = deserialize(stable_buf).unwrap(); (grm, sgraph, stable) } + +#[cfg(test)] +mod test { + extern crate temp_testdir; + use std::{fs::File, io::Write, path::PathBuf}; + + use self::temp_testdir::TempDir; + use super::{ActionKind, CTConflictsError, CTParserBuilder}; + + #[test] + fn test_conflicts() { + let temp = TempDir::default(); + let mut file_path = PathBuf::from(temp.as_ref()); + file_path.push("grm.y"); + let mut f = File::create(&file_path).unwrap(); + let _ = f.write_all( + "%start A +%% +A : 'a' 'b' | B 'b'; +B : 'a' | C; +C : 'a';" + .as_bytes() + ); + + let mut ct = CTParserBuilder::new() + .error_on_conflicts(false) + .action_kind(ActionKind::GenericParseTree); + ct.process_file_in_src(file_path.to_str().unwrap()).unwrap(); + + match ct.conflicts() { + Some((_, _, _, conflicts)) => { + assert_eq!(conflicts.sr_len(), 1); + assert_eq!(conflicts.rr_len(), 1); + } + None => panic!("Expected error data") + } + } + + #[test] + fn test_conflicts_error() { + let temp = TempDir::default(); + let mut file_path = PathBuf::from(temp.as_ref()); + file_path.push("grm.y"); + let mut f = File::create(&file_path).unwrap(); + let _ = f.write_all( + "%start A +%% +A : 'a' 'b' | B 'b'; +B : 'a' | C; +C : 'a';" + .as_bytes() + ); + + match CTParserBuilder::new() + .action_kind(ActionKind::GenericParseTree) + .process_file_in_src(file_path.to_str().unwrap()) + { + Ok(_) => panic!("Expected error"), + Err(e) => { + let cs = e.downcast_ref::>(); + assert_eq!(cs.unwrap().stable.conflicts().unwrap().sr_len(), 1); + assert_eq!(cs.unwrap().stable.conflicts().unwrap().rr_len(), 1); + } + } + } +} diff --git a/lrtable/src/lib/statetable.rs b/lrtable/src/lib/statetable.rs index 63749a0e0..ac8f76101 100644 --- a/lrtable/src/lib/statetable.rs +++ b/lrtable/src/lib/statetable.rs @@ -50,6 +50,68 @@ use stategraph::StateGraph; use StIdx; use StIdxStorageT; +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Debug)] +pub struct Conflicts { + reduce_reduce: Vec<(PIdx, PIdx, StIdx)>, + shift_reduce: Vec<(TIdx, PIdx, StIdx)> +} + +impl Conflicts +where + usize: AsPrimitive, + u32: AsPrimitive +{ + /// Return an iterator over all shift/reduce conflicts. + pub fn sr_conflicts(&self) -> impl Iterator, PIdx, StIdx)> { + self.shift_reduce.iter() + } + + /// Return an iterator over all reduce/reduce conflicts. + pub fn rr_conflicts(&self) -> impl Iterator, PIdx, StIdx)> { + self.reduce_reduce.iter() + } + + /// How many shift/reduce conflicts are there? + pub fn sr_len(&self) -> usize { + self.shift_reduce.len() + } + + /// How many reduce/reduce conflicts are there? + pub fn rr_len(&self) -> usize { + self.reduce_reduce.len() + } + + /// Returns a pretty-printed version of the conflicts. + pub fn pp(&self, grm: &YaccGrammar) -> String { + let mut s = String::new(); + if self.sr_len() > 0 { + s.push_str("Shift/Reduce conflicts:\n"); + for (tidx, pidx, stidx) in self.sr_conflicts() { + s.push_str(&format!( + " State {:?}: Shift(\"{}\") / Reduce({})\n", + usize::from(*stidx), + grm.token_name(*tidx).unwrap(), + grm.pp_prod(*pidx) + )); + } + } + + if self.rr_len() > 0 { + s.push_str("Reduce/Reduce conflicts:\n"); + for (pidx, r_pidx, stidx) in self.rr_conflicts() { + s.push_str(&format!( + " State {:?}: Reduce({}) / Reduce({})\n", + usize::from(*stidx), + grm.pp_prod(*pidx), + grm.pp_prod(*r_pidx) + )); + } + } + s + } +} + /// The various different possible Yacc parser errors. #[derive(Debug)] pub enum StateTableErrorKind { @@ -95,10 +157,7 @@ pub struct StateTable { rules_len: RIdx, prods_len: PIdx, tokens_len: TIdx, - /// The number of reduce/reduce errors encountered. - pub reduce_reduce: u64, - /// The number of shift/reduce errors encountered. - pub shift_reduce: u64, + conflicts: Option>, pub final_state: StIdx } @@ -149,8 +208,9 @@ where assert!(usize::from(sg.all_states_len()) < (usize::from(StIdx::max_value()) - 1)); gotos.resize(maxg, 0); - let mut reduce_reduce = 0; // How many automatically resolved reduce/reduces were made? - let mut shift_reduce = 0; // How many automatically resolved shift/reduces were made? + // Store automatically resolved conflicts, so we can print them out later + let mut reduce_reduce = Vec::new(); + let mut shift_reduce = Vec::new(); let mut final_state = None; for (stidx, state) in sg @@ -186,10 +246,10 @@ where // By default, Yacc resolves reduce/reduce conflicts in favour // of the earlier production in the grammar. if pidx < r_pidx { - reduce_reduce += 1; + reduce_reduce.push((pidx, r_pidx, stidx)); actions[off as usize] = StateTable::encode(Action::Reduce(pidx)); } else if pidx > r_pidx { - reduce_reduce += 1; + reduce_reduce.push((r_pidx, pidx, stidx)); } } Action::Accept => { @@ -230,13 +290,15 @@ where match StateTable::decode(actions[off as usize]) { Action::Shift(x) => assert_eq!(*ref_stidx, x), Action::Reduce(r_pidx) => { - shift_reduce += resolve_shift_reduce( + resolve_shift_reduce( grm, &mut actions, off as usize, s_tidx, r_pidx, - *ref_stidx + *ref_stidx, + &mut shift_reduce, + stidx ); } Action::Accept => panic!("Internal error"), @@ -307,6 +369,14 @@ where let actions_sv = SparseVec::::from(&actions, 0, usize::from(grm.tokens_len())); let gotos_sv = SparseVec::::from(&gotos, 0, usize::from(grm.rules_len())); + let mut conflicts = None; + if !(reduce_reduce.is_empty() && shift_reduce.is_empty()) { + conflicts = Some(Conflicts { + reduce_reduce, + shift_reduce + }); + } + Ok(StateTable { actions: actions_sv, state_actions, @@ -317,8 +387,7 @@ where rules_len: grm.rules_len(), prods_len: grm.prods_len(), tokens_len: grm.tokens_len(), - reduce_reduce, - shift_reduce, + conflicts, final_state: final_state.unwrap() }) } @@ -424,6 +493,11 @@ where None => unreachable!() } } + + /// Return a struct containing all conflicts or `None` if there aren't any. + pub fn conflicts(&self) -> Option<&Conflicts> { + self.conflicts.as_ref() + } } fn actions_offset( @@ -478,13 +552,13 @@ fn resolve_shift_reduce( off: usize, tidx: TIdx, pidx: PIdx, - stidx: StIdx -) -> u64 -where + stidx: StIdx, // State we want to shift to + shift_reduce: &mut Vec<(TIdx, PIdx, StIdx)>, + conflict_stidx: StIdx // State in which the conflict occured +) where usize: AsPrimitive, u32: AsPrimitive { - let mut shift_reduce = 0; let tidx_prec = grm.token_precedence(tidx); let pidx_prec = grm.prod_precedence(pidx); match (tidx_prec, pidx_prec) { @@ -492,7 +566,7 @@ where // If the token and production don't both have precedences, we use Yacc's default // resolution, which is in favour of the shift. actions[off] = StateTable::encode(Action::Shift(stidx)); - shift_reduce += 1; + shift_reduce.push((tidx, pidx, conflict_stidx)); } (Some(token_prec), Some(prod_prec)) => { if token_prec.level == prod_prec.level { @@ -524,7 +598,6 @@ where // the reduce as-is. } } - shift_reduce } #[cfg(test)] @@ -870,6 +943,42 @@ mod test { Action::Reduce(grm.rule_to_prods(grm.rule_idx("Expr").unwrap())[0])); } + #[test] + fn conflicts() { + let grm = YaccGrammar::new( + YaccKind::Original, + &" +%start A +%% +A : 'a' 'b' | B 'b'; +B : 'a' | C; +C : 'a'; + " + ) + .unwrap(); + let sg = pager_stategraph(&grm); + let st = StateTable::new(&grm, &sg).unwrap(); + let conflicts = st.conflicts().unwrap(); + assert_eq!(conflicts.sr_len(), 1); + assert_eq!(conflicts.rr_len(), 1); + assert_eq!( + conflicts.sr_conflicts().next().unwrap(), + &( + grm.token_idx("b").unwrap(), + grm.rule_to_prods(grm.rule_idx("B").unwrap())[0], + StIdx::from(2) + ) + ); + assert_eq!( + conflicts.rr_conflicts().next().unwrap(), + &( + grm.rule_to_prods(grm.rule_idx("B").unwrap())[0], + grm.rule_to_prods(grm.rule_idx("C").unwrap())[0], + StIdx::from(2) + ) + ); + } + #[test] fn accept_reduce_conflict() { let grm = YaccGrammar::new( diff --git a/nimbleparse/src/main.rs b/nimbleparse/src/main.rs index f5b40f6ff..482d22e19 100644 --- a/nimbleparse/src/main.rs +++ b/nimbleparse/src/main.rs @@ -159,6 +159,11 @@ fn main() { } }; + if let Some(c) = stable.conflicts() { + println!("{}", c.pp(&grm)); + println!("Stategraph:\n{}\n", sgraph.pp_core_states(&grm)); + } + { let rule_ids = grm .tokens_map()