From 1d4dc4624f94e59e65e6243158086fd3ae5555bd Mon Sep 17 00:00:00 2001 From: Tomas Tauber <2410580+tomtau@users.noreply.github.com> Date: Wed, 4 Sep 2024 17:53:48 +0800 Subject: [PATCH] add validation for tags on silent rules (fixes #1035) --- debugger/Cargo.toml | 8 +-- derive/Cargo.toml | 6 +- generator/Cargo.toml | 6 +- grammars/Cargo.toml | 6 +- meta/Cargo.toml | 4 +- meta/src/validator.rs | 117 +++++++++++++++++++++++++++++++++++++++ pest/Cargo.toml | 2 +- pest/examples/parens.rs | 1 + pest/src/error.rs | 37 ++++++------- pest/src/parser_state.rs | 1 + vm/Cargo.toml | 6 +- 11 files changed, 156 insertions(+), 38 deletions(-) diff --git a/debugger/Cargo.toml b/debugger/Cargo.toml index 19c3c6ee..fb72fcba 100644 --- a/debugger/Cargo.toml +++ b/debugger/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest_debugger" description = "pest grammar debugger" -version = "2.7.11" +version = "2.7.12" edition = "2021" authors = [ "Dragoș Tiselice ", @@ -17,9 +17,9 @@ readme = "_README.md" rust-version = "1.61" [dependencies] -pest = { path = "../pest", version = "2.7.11" } -pest_meta = { path = "../meta", version = "2.7.11" } -pest_vm = { path = "../vm", version = "2.7.11" } +pest = { path = "../pest", version = "2.7.12" } +pest_meta = { path = "../meta", version = "2.7.12" } +pest_vm = { path = "../vm", version = "2.7.12" } reqwest = { version = "= 0.11.13", default-features = false, features = [ "blocking", "json", diff --git a/derive/Cargo.toml b/derive/Cargo.toml index d1962030..8a76e828 100644 --- a/derive/Cargo.toml +++ b/derive/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest_derive" description = "pest's derive macro" -version = "2.7.11" +version = "2.7.12" edition = "2021" authors = ["Dragoș Tiselice "] homepage = "https://pest.rs/" @@ -25,5 +25,5 @@ grammar-extras = ["pest_generator/grammar-extras"] [dependencies] # for tests, included transitively anyway -pest = { path = "../pest", version = "2.7.11", default-features = false } -pest_generator = { path = "../generator", version = "2.7.11", default-features = false } +pest = { path = "../pest", version = "2.7.12", default-features = false } +pest_generator = { path = "../generator", version = "2.7.12", default-features = false } diff --git a/generator/Cargo.toml b/generator/Cargo.toml index 3fb8ebf8..41706bd0 100644 --- a/generator/Cargo.toml +++ b/generator/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest_generator" description = "pest code generator" -version = "2.7.11" +version = "2.7.12" edition = "2021" authors = ["Dragoș Tiselice "] homepage = "https://pest.rs/" @@ -22,8 +22,8 @@ grammar-extras = ["pest_meta/grammar-extras"] export-internal = [] [dependencies] -pest = { path = "../pest", version = "2.7.11", default-features = false } -pest_meta = { path = "../meta", version = "2.7.11" } +pest = { path = "../pest", version = "2.7.12", default-features = false } +pest_meta = { path = "../meta", version = "2.7.12" } proc-macro2 = "1.0" quote = "1.0" syn = "2.0" diff --git a/grammars/Cargo.toml b/grammars/Cargo.toml index c9116749..a9475e3a 100644 --- a/grammars/Cargo.toml +++ b/grammars/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest_grammars" description = "pest popular grammar implementations" -version = "2.7.11" +version = "2.7.12" edition = "2021" authors = ["Dragoș Tiselice "] homepage = "https://pest.rs/" @@ -14,8 +14,8 @@ readme = "_README.md" rust-version = "1.61" [dependencies] -pest = { path = "../pest", version = "2.7.11" } -pest_derive = { path = "../derive", version = "2.7.11" } +pest = { path = "../pest", version = "2.7.12" } +pest_derive = { path = "../derive", version = "2.7.12" } [dev-dependencies] criterion = "0.5" diff --git a/meta/Cargo.toml b/meta/Cargo.toml index d10bddeb..45e88732 100644 --- a/meta/Cargo.toml +++ b/meta/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest_meta" description = "pest meta language parser and validator" -version = "2.7.11" +version = "2.7.12" edition = "2021" authors = ["Dragoș Tiselice "] homepage = "https://pest.rs/" @@ -22,7 +22,7 @@ include = [ rust-version = "1.61" [dependencies] -pest = { path = "../pest", version = "2.7.11" } +pest = { path = "../pest", version = "2.7.12" } once_cell = "1.8.0" [build-dependencies] diff --git a/meta/src/validator.rs b/meta/src/validator.rs index 16d38799..eff89969 100644 --- a/meta/src/validator.rs +++ b/meta/src/validator.rs @@ -226,6 +226,8 @@ pub fn validate_ast<'a, 'i: 'a>(rules: &'a Vec>) -> Vec span, @@ -235,6 +237,78 @@ pub fn validate_ast<'a, 'i: 'a>(rules: &'a Vec>) -> Vec(rules: &'a [ParserRule<'i>]) -> Vec> { + use crate::ast::RuleType; + + fn to_type_hash_map<'a, 'i: 'a>( + rules: &'a [ParserRule<'i>], + ) -> HashMap, RuleType)> { + rules + .iter() + .map(|r| (r.name.clone(), (&r.node, r.ty))) + .collect() + } + let mut result = vec![]; + + fn check_silent_builtin<'a, 'i: 'a>( + expr: &ParserExpr<'i>, + rules_ref: &HashMap, RuleType)>, + span: Span<'a>, + ) -> Option> { + match &expr { + ParserExpr::Ident(rule_name) => { + let rule = rules_ref.get(rule_name); + if matches!(rule, Some((_, RuleType::Silent))) { + return Some(Error::::new_from_span( + ErrorVariant::CustomError { + message: "tags on silent rules will not appear in the output" + .to_owned(), + }, + span, + )); + } else if BUILTINS.contains(rule_name.as_str()) { + return Some(Error::new_from_span( + ErrorVariant::CustomError { + message: "tags on built-in rules will not appear in the output" + .to_owned(), + }, + span, + )); + } + } + ParserExpr::Rep(node) + | ParserExpr::RepMinMax(node, _, _) + | ParserExpr::RepMax(node, _) + | ParserExpr::RepMin(node, _) + | ParserExpr::RepOnce(node) + | ParserExpr::RepExact(node, _) + | ParserExpr::Opt(node) + | ParserExpr::Push(node) + | ParserExpr::PosPred(node) + | ParserExpr::NegPred(node) => { + return check_silent_builtin(&node.expr, rules_ref, span); + } + _ => {} + }; + None + } + + let rules_map = to_type_hash_map(rules); + for rule in rules { + let rules_ref = &rules_map; + let mut errors = rule.node.clone().filter_map_top_down(|node1| { + if let ParserExpr::NodeTag(node2, _) = node1.expr { + check_silent_builtin(&node2.expr, rules_ref, node1.span) + } else { + None + } + }); + result.append(&mut errors); + } + result +} + /// Checks if `expr` is non-progressing, that is the expression does not /// consume any input or any stack. This includes expressions matching the empty input, /// `SOI` and ̀ `EOI`, predicates and repetitions. @@ -1796,4 +1870,47 @@ mod tests { PestParser::parse(Rule::grammar_rules, input).unwrap(), )); } + + #[test] + #[should_panic(expected = "grammar error + + --> 1:7 + | +1 | a = { #b = b } b = _{ ASCII_DIGIT+ } + | ^----^ + | + = tags on silent rules will not appear in the output")] + #[cfg(feature = "grammar-extras")] + fn tag_on_silent_rule() { + let input = "a = { #b = b } b = _{ ASCII_DIGIT+ }"; + unwrap_or_report(consume_rules( + PestParser::parse(Rule::grammar_rules, input).unwrap(), + )); + } + + #[test] + #[should_panic(expected = "grammar error + + --> 1:7 + | +1 | a = { #b = ASCII_DIGIT+ } + | ^---------------^ + | + = tags on built-in rules will not appear in the output")] + #[cfg(feature = "grammar-extras")] + fn tag_on_builtin_rule() { + let input = "a = { #b = ASCII_DIGIT+ }"; + unwrap_or_report(consume_rules( + PestParser::parse(Rule::grammar_rules, input).unwrap(), + )); + } + + #[test] + #[cfg(feature = "grammar-extras")] + fn tag_on_normal_rule() { + let input = "a = { #b = b } b = { ASCII_DIGIT+ }"; + unwrap_or_report(consume_rules( + PestParser::parse(Rule::grammar_rules, input).unwrap(), + )); + } } diff --git a/pest/Cargo.toml b/pest/Cargo.toml index 0334862a..e15d3253 100644 --- a/pest/Cargo.toml +++ b/pest/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest" description = "The Elegant Parser" -version = "2.7.11" +version = "2.7.12" edition = "2021" authors = ["Dragoș Tiselice "] homepage = "https://pest.rs/" diff --git a/pest/examples/parens.rs b/pest/examples/parens.rs index 34fbb2e3..172d1e36 100644 --- a/pest/examples/parens.rs +++ b/pest/examples/parens.rs @@ -48,6 +48,7 @@ impl Parser for ParenParser { } #[derive(Debug)] +#[allow(dead_code)] struct Paren(Vec); fn expr(pairs: Pairs) -> Vec { diff --git a/pest/src/error.rs b/pest/src/error.rs index 2a6c1fe5..7ad4a1c8 100644 --- a/pest/src/error.rs +++ b/pest/src/error.rs @@ -730,14 +730,13 @@ fn visualize_whitespace(input: &str) -> String { #[cfg(test)] mod tests { - use super::super::position; use super::*; use alloc::vec; #[test] fn display_parsing_error_mixed() { let input = "ab\ncd\nef"; - let pos = position::Position::new(input, 4).unwrap(); + let pos = Position::new(input, 4).unwrap(); let error: Error = Error::new_from_pos( ErrorVariant::ParsingError { positives: vec![1, 2, 3], @@ -763,7 +762,7 @@ mod tests { #[test] fn display_parsing_error_positives() { let input = "ab\ncd\nef"; - let pos = position::Position::new(input, 4).unwrap(); + let pos = Position::new(input, 4).unwrap(); let error: Error = Error::new_from_pos( ErrorVariant::ParsingError { positives: vec![1, 2], @@ -789,7 +788,7 @@ mod tests { #[test] fn display_parsing_error_negatives() { let input = "ab\ncd\nef"; - let pos = position::Position::new(input, 4).unwrap(); + let pos = Position::new(input, 4).unwrap(); let error: Error = Error::new_from_pos( ErrorVariant::ParsingError { positives: vec![], @@ -815,7 +814,7 @@ mod tests { #[test] fn display_parsing_error_unknown() { let input = "ab\ncd\nef"; - let pos = position::Position::new(input, 4).unwrap(); + let pos = Position::new(input, 4).unwrap(); let error: Error = Error::new_from_pos( ErrorVariant::ParsingError { positives: vec![], @@ -841,7 +840,7 @@ mod tests { #[test] fn display_custom_pos() { let input = "ab\ncd\nef"; - let pos = position::Position::new(input, 4).unwrap(); + let pos = Position::new(input, 4).unwrap(); let error: Error = Error::new_from_pos( ErrorVariant::CustomError { message: "error: big one".to_owned(), @@ -866,8 +865,8 @@ mod tests { #[test] fn display_custom_span_two_lines() { let input = "ab\ncd\nefgh"; - let start = position::Position::new(input, 4).unwrap(); - let end = position::Position::new(input, 9).unwrap(); + let start = Position::new(input, 4).unwrap(); + let end = Position::new(input, 9).unwrap(); let error: Error = Error::new_from_span( ErrorVariant::CustomError { message: "error: big one".to_owned(), @@ -893,8 +892,8 @@ mod tests { #[test] fn display_custom_span_three_lines() { let input = "ab\ncd\nefgh"; - let start = position::Position::new(input, 1).unwrap(); - let end = position::Position::new(input, 9).unwrap(); + let start = Position::new(input, 1).unwrap(); + let end = Position::new(input, 9).unwrap(); let error: Error = Error::new_from_span( ErrorVariant::CustomError { message: "error: big one".to_owned(), @@ -921,8 +920,8 @@ mod tests { #[test] fn display_custom_span_two_lines_inverted_cols() { let input = "abcdef\ngh"; - let start = position::Position::new(input, 5).unwrap(); - let end = position::Position::new(input, 8).unwrap(); + let start = Position::new(input, 5).unwrap(); + let end = Position::new(input, 8).unwrap(); let error: Error = Error::new_from_span( ErrorVariant::CustomError { message: "error: big one".to_owned(), @@ -948,8 +947,8 @@ mod tests { #[test] fn display_custom_span_end_after_newline() { let input = "abcdef\n"; - let start = position::Position::new(input, 0).unwrap(); - let end = position::Position::new(input, 7).unwrap(); + let start = Position::new(input, 0).unwrap(); + let end = Position::new(input, 7).unwrap(); assert!(start.at_start()); assert!(end.at_end()); @@ -977,8 +976,8 @@ mod tests { #[test] fn display_custom_span_empty() { let input = ""; - let start = position::Position::new(input, 0).unwrap(); - let end = position::Position::new(input, 0).unwrap(); + let start = Position::new(input, 0).unwrap(); + let end = Position::new(input, 0).unwrap(); assert!(start.at_start()); assert!(end.at_end()); @@ -1006,7 +1005,7 @@ mod tests { #[test] fn mapped_parsing_error() { let input = "ab\ncd\nef"; - let pos = position::Position::new(input, 4).unwrap(); + let pos = Position::new(input, 4).unwrap(); let error: Error = Error::new_from_pos( ErrorVariant::ParsingError { positives: vec![1, 2, 3], @@ -1033,7 +1032,7 @@ mod tests { #[test] fn error_with_path() { let input = "ab\ncd\nef"; - let pos = position::Position::new(input, 4).unwrap(); + let pos = Position::new(input, 4).unwrap(); let error: Error = Error::new_from_pos( ErrorVariant::ParsingError { positives: vec![1, 2, 3], @@ -1060,7 +1059,7 @@ mod tests { #[test] fn underline_with_tabs() { let input = "a\txbc"; - let pos = position::Position::new(input, 2).unwrap(); + let pos = Position::new(input, 2).unwrap(); let error: Error = Error::new_from_pos( ErrorVariant::ParsingError { positives: vec![1, 2, 3], diff --git a/pest/src/parser_state.rs b/pest/src/parser_state.rs index 276e81f5..b5361414 100644 --- a/pest/src/parser_state.rs +++ b/pest/src/parser_state.rs @@ -438,6 +438,7 @@ pub struct ParserState<'i, R: RuleType> { /// * CreateUser = { "create" ~ "user" ~ Name } /// * CreateTable = { "create" ~ "table" ~ Name } /// * Name = { SOME_DEFINITION } + /// /// While parsing the query we'll update tracker position to the start of "Bobby", because we'd /// successfully parse "create" + "user" (and not "table"). parse_attempts: ParseAttempts, diff --git a/vm/Cargo.toml b/vm/Cargo.toml index 22d72418..b0b22413 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest_vm" description = "pest grammar virtual machine" -version = "2.7.11" +version = "2.7.12" edition = "2021" authors = ["Dragoș Tiselice "] homepage = "https://pest.rs/" @@ -14,8 +14,8 @@ readme = "_README.md" rust-version = "1.61" [dependencies] -pest = { path = "../pest", version = "2.7.11" } -pest_meta = { path = "../meta", version = "2.7.11" } +pest = { path = "../pest", version = "2.7.12" } +pest_meta = { path = "../meta", version = "2.7.12" } [features] grammar-extras = ["pest_meta/grammar-extras"]