From b9ea56a92f4d5eb1830d59ea72cda57b77061f6f Mon Sep 17 00:00:00 2001 From: tyranron Date: Sat, 6 Nov 2021 20:07:37 +0100 Subject: [PATCH 1/4] Bootstrap AST and parser --- .clippy.toml | 1 + CHANGELOG.md | 5 +- Cargo.toml | 4 + src/ast.rs | 162 ++++ src/combinator.rs | 300 ++++++ src/lib.rs | 23 + src/parse.rs | 2256 +++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 2750 insertions(+), 1 deletion(-) create mode 100644 src/ast.rs create mode 100644 src/combinator.rs create mode 100644 src/parse.rs diff --git a/.clippy.toml b/.clippy.toml index 5d506f6..dbede16 100644 --- a/.clippy.toml +++ b/.clippy.toml @@ -6,5 +6,6 @@ standard-macro-braces = [ { name = "assert", brace = "(" }, { name = "assert_eq", brace = "(" }, { name = "assert_ne", brace = "(" }, + { name = "matches", brace = "(" }, { name = "vec", brace = "[" }, ] diff --git a/CHANGELOG.md b/CHANGELOG.md index a93084c..b6a7f4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,9 +11,12 @@ All user visible changes to `cucumber-expressions` crate will be documented in t ### Added -- ??? +- [Cucumber Expressions] AST and parser. ([#1]) +[#1]: /../../pull/1 + +[Cucumber Expressions]: https://github.com/cucumber/cucumber-expressions#readme [Semantic Versioning 2.0.0]: https://semver.org diff --git a/Cargo.toml b/Cargo.toml index 6b1051c..7b0df8e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,7 @@ name = "cucumber-expressions" version = "0.1.0-dev" edition = "2021" rust-version = "1.56" +description = "Cucumber Expressions AST and parser." license = "MIT OR Apache-2.0" authors = [ "Ilya Solovyiov ", @@ -17,3 +18,6 @@ keywords = ["cucumber", "expression", "expressions", "cucumber-expressions"] include = ["/src/", "/LICENSE-*", "/README.md", "/CHANGELOG.md"] [dependencies] +derive_more = { version = "0.99.16", features = ["as_ref", "deref", "deref_mut", "display", "error"], default_features = false } +nom = "7.0" +nom_locate = "4.0" diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..522b4ec --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,162 @@ +// Copyright (c) 2021 Brendan Molloy , +// Ilya Solovyiov , +// Kai Ren +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! [Cucumber Expressions][1] [AST][2] definitions. +//! +//! See details in the [grammar spec][3]. +//! +//! [1]: https://github.com/cucumber/cucumber-expressions#readme +//! [2]: https://en.wikipedia.org/wiki/Abstract_syntax_tree +//! [3]: https://tinyurl.com/cucumber-expr-spec#grammar + +use derive_more::{AsRef, Deref, DerefMut}; +use nom::{error::ErrorKind, Err, InputLength}; +use nom_locate::LocatedSpan; + +use crate::parse; + +/// [`str`] along with its location information in the original string. +pub type Spanned<'s> = LocatedSpan<&'s str>; + +/// Top-level [`cucumber-expression`][3]. +/// +/// See [`parse::expression()`] for the detailed grammar and examples. +/// +/// [3]: https://tinyurl.com/cucumber-expr-spec#grammar +#[derive(AsRef, Clone, Debug, Deref, DerefMut, Eq, PartialEq)] +pub struct Expression(pub Vec>); + +impl<'s> TryFrom<&'s str> for Expression> { + type Error = parse::Error>; + + fn try_from(value: &'s str) -> Result { + parse::expression(Spanned::new(value)) + .map_err(|e| match e { + Err::Error(e) | Err::Failure(e) => e, + Err::Incomplete(n) => parse::Error::Needed(n), + }) + .and_then(|(rest, parsed)| { + rest.is_empty() + .then(|| parsed) + .ok_or(parse::Error::Other(rest, ErrorKind::Verify)) + }) + } +} + +impl<'s> Expression> { + /// Parses the given `input` as an [`Expression`]. + /// + /// # Errors + /// + /// See [`parse::Error`] for details. + pub fn parse>( + input: &'s I, + ) -> Result>> { + Self::try_from(input.as_ref()) + } +} + +/// Single entry of a [`cucumber-expression`][3]. +/// +/// See [`parse::single_expression()`] for the detailed grammar and examples. +/// +/// [3]: https://tinyurl.com/cucumber-expr-spec#grammar +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum SingleExpression { + /// [`alternation`][3] expression. + /// + /// [3]: https://tinyurl.com/cucumber-expr-spec#grammar + Alternation(Alternation), + + /// [`optional`][3] expression. + /// + /// [3]: https://tinyurl.com/cucumber-expr-spec#grammar + Optional(Optional), + + /// [`parameter`][3] expression. + /// + /// [3]: https://tinyurl.com/cucumber-expr-spec#grammar + Parameter(Parameter), + + /// Text without whitespaces. + Text(Input), + + /// Whitespaces are treated as a special case to avoid lookaheads and + /// lookbehinds described in the [architecture][1]. This allows parsing to + /// have `O(n)` complexity. + /// + /// [1]: https://tinyurl.com/cucumber-expr-spec + Whitespace, +} + +/// Allows to match one of [`SingleAlternation`]s. +/// +/// See [`parse::alternation()`] for detailed syntax and examples. +#[derive(AsRef, Clone, Debug, Deref, DerefMut, Eq, PartialEq)] +pub struct Alternation(pub Vec>); + +/// Building block an [`Alternation`]. +pub type SingleAlternation = Vec>; + +impl Alternation { + /// Returns length of capture from `Input`. + pub(crate) fn span_len(&self) -> usize { + self.0 + .iter() + .flatten() + .map(|alt| match alt { + Alternative::Text(t) => t.input_len(), + Alternative::Optional(opt) => opt.input_len() + 2, + }) + .sum::() + + self.len() + - 1 + } + + /// Indicates whether one of [`SingleAlternation`]s consists only from + /// [`Optional`]s. + pub(crate) fn contains_only_optional(&self) -> bool { + for single_alt in &**self { + if single_alt + .iter() + .all(|alt| matches!(alt, Alternative::Optional(_))) + { + return true; + } + } + false + } +} + +/// [`alternative`][3] expression. +/// +/// See [`parse::alternative()`] for the detailed grammar and examples. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum Alternative { + /// [`optional`][3] expression. + /// + /// [3]: https://tinyurl.com/cucumber-expr-spec#grammar + Optional(Optional), + + /// Text. + Text(Input), +} + +/// Allows to match optional `Input`. +/// +/// See [`parse::optional()`] for detailed syntax and examples. +#[derive(AsRef, Clone, Copy, Debug, Deref, DerefMut, Eq, PartialEq)] +pub struct Optional(pub Input); + +/// Allows to match some special `Input` descried by a [`Parameter`] name. +/// +/// See [`parse::parameter()`] for detailed syntax and examples. +#[derive(AsRef, Clone, Copy, Debug, Deref, DerefMut, Eq, PartialEq)] +pub struct Parameter(pub Input); diff --git a/src/combinator.rs b/src/combinator.rs new file mode 100644 index 0000000..4db258c --- /dev/null +++ b/src/combinator.rs @@ -0,0 +1,300 @@ +// Copyright (c) 2021 Brendan Molloy , +// Ilya Solovyiov , +// Kai Ren +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Helper parser combinators. + +use std::ops::RangeFrom; + +use nom::{ + error::{ErrorKind, ParseError}, + AsChar, Err, IResult, InputIter, InputLength, InputTake, + InputTakeAtPosition, Offset, Parser, Slice, +}; + +/// Applies `map` to `parser`s [`IResult`] in case it errored. +/// +/// Can be used to harden [`Error`] to [`Failure`]. +/// +/// [`Error`]: nom::Err::Error +/// [`Failure`]: nom::Err::Failure +/// [`verify()`]: nom::combinator::verify() +pub(crate) fn map_err, F, G>( + mut parser: F, + map: G, +) -> impl FnMut(I) -> IResult +where + F: Parser, + G: Fn(Err) -> Err, +{ + move |input: I| parser.parse(input).map_err(&map) +} + +/// Differences from [`escaped()`]: +/// 1. If `normal` matched empty sequence, tries to matched escaped; +/// 2. If `normal` matched empty sequence and then `escapable` didn't match +/// anything, returns empty sequence; +/// 3. Errors with [`ErrorKind::Escaped`] if `control_char` was followed by a +/// non-`escapable` `Input`. +/// +/// [`escaped()`]: nom::bytes::complete::escaped() +pub(crate) fn escaped0<'a, Input: 'a, Error, F, G, O1, O2>( + mut normal: F, + control_char: char, + mut escapable: G, +) -> impl FnMut(Input) -> IResult +where + Input: Clone + + Offset + + InputLength + + InputTake + + InputTakeAtPosition + + Slice> + + InputIter, + ::Item: AsChar, + F: Parser, + G: Parser, + Error: ParseError, +{ + move |input: Input| { + let mut i = input.clone(); + let mut consumed_nothing = false; + + while i.input_len() > 0 { + let current_len = i.input_len(); + + match (normal.parse(i.clone()), consumed_nothing) { + (Ok((i2, _)), false) => { + if i2.input_len() == 0 { + return Ok((input.slice(input.input_len()..), input)); + } + if i2.input_len() == current_len { + consumed_nothing = true; + } + i = i2; + } + (Ok(..), true) | (Err(Err::Error(_)), _) => { + let next_char = i + .iter_elements() + .next() + .ok_or_else(|| { + Err::Error(Error::from_error_kind( + i.clone(), + ErrorKind::Escaped, + )) + })? + .as_char(); + if next_char == control_char { + let next = control_char.len_utf8(); + if next >= i.input_len() { + return Err(Err::Error(Error::from_error_kind( + input, + ErrorKind::Escaped, + ))); + } + match escapable.parse(i.slice(next..)) { + Ok((i2, _)) => { + if i2.input_len() == 0 { + return Ok(( + input.slice(input.input_len()..), + input, + )); + } + consumed_nothing = false; + i = i2; + } + Err(_) => { + return Err(Err::Error( + Error::from_error_kind( + i, + ErrorKind::Escaped, + ), + )); + } + } + } else { + let index = input.offset(&i); + return Ok(input.take_split(index)); + } + } + (Err(e), _) => { + return Err(e); + } + } + } + + Ok((input.slice(input.input_len()..), input)) + } +} + +#[cfg(test)] +mod escaped0_spec { + use nom::{ + bytes::complete::escaped, + character::complete::{digit0, digit1, one_of}, + error::{Error, ErrorKind}, + Err, IResult, + }; + + use super::escaped0; + + /// Type used to compare behaviour of [`escaped`] and [`escaped0`]. + /// + /// Tuple is constructed from following parsers results: + /// - [`escaped0`]`(`[`digit0`]`, '\\', `[`one_of`]`(r#""n\"#))` + /// - [`escaped0`]`(`[`digit1`]`, '\\', `[`one_of`]`(r#""n\"#))` + /// - [`escaped`]`(`[`digit0`]`, '\\', `[`one_of`]`(r#""n\"#))` + /// - [`escaped`]`(`[`digit1`]`, '\\', `[`one_of`]`(r#""n\"#))` + type TestResult<'s> = ( + IResult<&'s str, &'s str>, + IResult<&'s str, &'s str>, + IResult<&'s str, &'s str>, + IResult<&'s str, &'s str>, + ); + + /// Produces [`TestResult`] from `input`. + fn get_result(input: &str) -> TestResult<'_> { + ( + escaped0(digit0, '\\', one_of(r#""n\"#))(input), + escaped0(digit1, '\\', one_of(r#""n\"#))(input), + escaped(digit0, '\\', one_of(r#""n\"#))(input), + escaped(digit1, '\\', one_of(r#""n\"#))(input), + ) + } + + #[test] + fn matches_empty() { + assert_eq!( + get_result(""), + (Ok(("", "")), Ok(("", "")), Ok(("", "")), Ok(("", ""))), + ); + } + + #[test] + fn matches_normal() { + assert_eq!( + get_result("123;"), + ( + Ok((";", "123")), + Ok((";", "123")), + Ok((";", "123")), + Ok((";", "123")) + ), + ); + } + + #[test] + fn matches_only_escaped() { + assert_eq!( + get_result(r#"\n\";"#), + ( + Ok((";", r#"\n\""#)), + Ok((";", r#"\n\""#)), + Ok((r#"\n\";"#, "")), + Ok((";", r#"\n\""#)), + ), + ); + } + + #[test] + fn matches_escaped_followed_by_normal() { + assert_eq!( + get_result(r#"\n\"123;"#), + ( + Ok((";", r#"\n\"123"#)), + Ok((";", r#"\n\"123"#)), + Ok((r#"\n\"123;"#, "")), + Ok((";", r#"\n\"123"#)), + ), + ); + } + + #[test] + fn matches_normal_followed_by_escaped() { + assert_eq!( + get_result(r#"123\n\";"#), + ( + Ok((";", r#"123\n\""#)), + Ok((";", r#"123\n\""#)), + Ok((r#"\n\";"#, "123")), + Ok((";", r#"123\n\""#)), + ), + ); + } + + #[test] + fn matches_escaped_followed_by_normal_then_escaped() { + assert_eq!( + get_result(r#"\n\"123\n;"#), + ( + Ok((";", r#"\n\"123\n"#)), + Ok((";", r#"\n\"123\n"#)), + Ok((r#"\n\"123\n;"#, "")), + Ok((";", r#"\n\"123\n"#)), + ), + ); + } + + #[test] + fn matches_normal_followed_by_escaped_then_normal() { + assert_eq!( + get_result(r#"123\n\"567;"#), + ( + Ok((";", r#"123\n\"567"#)), + Ok((";", r#"123\n\"567"#)), + Ok((r#"\n\"567;"#, "123")), + Ok((";", r#"123\n\"567"#)), + ), + ); + } + + #[test] + fn errors_on_escaped_non_reserved() { + assert_eq!( + get_result(r#"\n\r"#), + ( + Err(Err::Error(Error { + input: r#"\r"#, + code: ErrorKind::Escaped + })), + Err(Err::Error(Error { + input: r#"\r"#, + code: ErrorKind::Escaped + })), + Ok((r#"\n\r"#, "")), + Err(Err::Error(Error { + input: r#"r"#, + code: ErrorKind::OneOf + })), + ), + ); + } + + #[test] + fn errors_on_control_char() { + assert_eq!( + get_result("\\"), + ( + Err(Err::Error(Error { + input: "\\", + code: ErrorKind::Escaped + })), + Err(Err::Error(Error { + input: "\\", + code: ErrorKind::Escaped + })), + Ok(("\\", "")), + Err(Err::Error(Error { + input: "\\", + code: ErrorKind::Escaped + })) + ), + ); + } +} diff --git a/src/lib.rs b/src/lib.rs index 0b3578e..eb6acba 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,13 @@ +// Copyright (c) 2021 Brendan Molloy , +// Ilya Solovyiov , +// Kai Ren +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + #![doc( html_logo_url = "https://avatars.githubusercontent.com/u/91469139?s=128", html_favicon_url = "https://avatars.githubusercontent.com/u/91469139?s=256" @@ -85,3 +95,16 @@ unused_results, variant_size_differences )] + +pub mod ast; +mod combinator; +pub mod parse; + +#[doc(inline)] +pub use self::{ + ast::{ + Alternation, Alternative, Expression, Optional, Parameter, + SingleAlternation, SingleExpression, Spanned, + }, + parse::Error, +}; diff --git a/src/parse.rs b/src/parse.rs new file mode 100644 index 0000000..b918699 --- /dev/null +++ b/src/parse.rs @@ -0,0 +1,2256 @@ +// Copyright (c) 2021 Brendan Molloy , +// Ilya Solovyiov , +// Kai Ren +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! [Cucumber Expressions][1] [AST][2] parser. +//! +//! See details in the [grammar spec][3]. +//! +//! [1]: https://github.com/cucumber/cucumber-expressions#readme +//! [2]: https://en.wikipedia.org/wiki/Abstract_syntax_tree +//! [3]: https://tinyurl.com/cucumber-expr-spec#grammar + +use std::{fmt::Display, ops::RangeFrom}; + +use derive_more::{Display, Error}; +use nom::{ + branch::alt, + bytes::complete::{tag, take_while}, + character::complete::one_of, + combinator::{map, peek, verify}, + error::{ErrorKind, ParseError}, + multi::{many0, many1, separated_list1}, + sequence::tuple, + AsChar, Compare, Err, FindToken, IResult, InputIter, InputLength, + InputTake, InputTakeAtPosition, Needed, Offset, Parser, Slice, +}; + +use crate::{ + ast::{ + Alternation, Alternative, Expression, Optional, Parameter, + SingleExpression, + }, + combinator, +}; + +/// Reserved characters requiring special handling. +pub const RESERVED_CHARS: &str = r#"{}()\/ "#; + +/// Matches `normal` and [`RESERVED_CHARS`] escaped with `\`. +/// +/// Uses [`combinator::escaped0`] under the hood. +/// +/// # Errors +/// +/// ## Recoverable [`Error`] +/// +/// - If `normal` parser errors. +/// +/// ## Irrecoverable [`Failure`] +/// +/// - If `normal` parser fails +/// - [`EscapedNonReservedCharacter`] +/// +/// [`Error`]: Err::Error +/// [`EscapedNonReservedCharacter`]: Error::EscapedNonReservedCharacter +/// [`Failure`]: Err::Failure +fn escaped_reserved_chars0<'a, Input: 'a, F, O1>( + normal: F, +) -> impl FnMut(Input) -> IResult> +where + Input: Clone + + Display + + Offset + + InputLength + + InputTake + + InputTakeAtPosition + + Slice> + + InputIter, + ::Item: AsChar + Copy, + F: Parser>, + Error: ParseError, + for<'s> &'s str: FindToken<::Item>, +{ + combinator::map_err( + combinator::escaped0(normal, '\\', one_of(RESERVED_CHARS)), + |e| { + if let Err::Error(Error::Other(span, ErrorKind::Escaped)) = e { + let span = (span.input_len() > 0) + .then(|| span.take(1)) + .unwrap_or(span); + Error::EscapedNonReservedCharacter(span).failure() + } else { + e + } + }, + ) +} + +/// # Syntax +/// +/// ```text +/// parameter := '{' (name | '\' name_to_escape)* '}' +/// name := ^name_to_escape +/// name_to_escape := '{' | '}' | '(' | '/' | '\' +/// ``` +/// +/// # Example +/// +/// ```text +/// {} +/// {name} +/// {with spaces} +/// {escaped \/\{\(} +/// {no need to escape )} +/// {🦀} +/// ``` +/// +/// # Errors +/// +/// ## Recoverable [`Error`]s +/// +/// - If `input` doesn't start with `{` +/// +/// ## Irrecoverable [`Failure`]s +/// +/// - [`EscapedNonReservedCharacter`] +/// - [`NestedParameter`] +/// - [`OptionalInParameter`] +/// - [`UnescapedReservedCharacter`] +/// - [`UnfinishedParameter`] +/// +/// [`Error`]: Err::Error +/// [`Failure`]: Err::Failure +/// [`EscapedNonReservedCharacter`]: Error::EscapedNonReservedCharacter +/// [`NestedParameter`]: Error::NestedParameter +/// [`OptionalInParameter`]: Error::OptionalInParameter +/// [`UnescapedReservedCharacter`]: Error::UnescapedReservedCharacter +/// [`UnfinishedParameter`]: Error::UnfinishedParameter +pub fn parameter<'a, Input: 'a>( + input: Input, +) -> IResult, Error> +where + Input: Clone + + Display + + Offset + + InputLength + + InputTake + + InputTakeAtPosition + + Slice> + + InputIter + + for<'s> Compare<&'s str>, + ::Item: AsChar + Copy, + Error: ParseError, + for<'s> &'s str: FindToken<::Item>, +{ + let is_name = |c| !"{}(\\/".contains(c); + + let fail = |input: Input, opening_brace| { + match input.iter_elements().next().map(AsChar::as_char) { + Some('{') => { + if let Ok((_, (par, ..))) = peek(tuple(( + parameter, + escaped_reserved_chars0(take_while(is_name)), + tag("}"), + )))(input.clone()) + { + return Error::NestedParameter( + input.take(par.0.input_len() + 2), + ) + .failure(); + } + return Error::UnescapedReservedCharacter(input.take(1)) + .failure(); + } + Some('(') => { + if let Ok((_, opt)) = peek(optional)(input.clone()) { + return Error::OptionalInParameter( + input.take(opt.0.input_len() + 2), + ) + .failure(); + } + return Error::UnescapedReservedCharacter(input.take(1)) + .failure(); + } + Some(c) if RESERVED_CHARS.contains(c) => { + return Error::UnescapedReservedCharacter(input.take(1)) + .failure(); + } + _ => {} + } + Error::UnfinishedParameter(opening_brace).failure() + }; + + let (input, opening_brace) = tag("{")(input)?; + let (input, par_name) = + escaped_reserved_chars0(take_while(is_name))(input)?; + let (input, _) = combinator::map_err(tag("}"), |_| { + fail(input.clone(), opening_brace.clone()) + })(input.clone())?; + + Ok((input, Parameter(par_name))) +} + +/// # Syntax +/// +/// ```text +/// optional := '(' (text_in_optional | '\' optional_to_escape)+ ')' +/// text_in_optional := ^optional_to_escape +/// optional_to_escape := '(' | ')' | '{' | '/' | '\' +/// ``` +/// +/// # Example +/// +/// ```text +/// (name) +/// (with spaces) +/// (escaped \/\{\() +/// (no need to escape }) +/// (🦀) +/// ``` +/// +/// # Errors +/// +/// ## Recoverable [`Error`]s +/// +/// - If `input` doesn't start with `(` +/// +/// ## Irrecoverable [`Failure`]s +/// +/// - [`AlternationInOptional`] +/// - [`EmptyOptional`] +/// - [`EscapedNonReservedCharacter`] +/// - [`NestedOptional`] +/// - [`ParameterInOptional`] +/// - [`UnescapedReservedCharacter`] +/// - [`UnfinishedOptional`] +/// +/// [`Error`]: Err::Error +/// [`Failure`]: Err::Failure +/// [`AlternationInOptional`]: Error::AlternationInOptional +/// [`EmptyOptional`]: Error::EmptyOptional +/// [`EscapedNonReservedCharacter`]: Error::EscapedNonReservedCharacter +/// [`NestedOptional`]: Error::NestedOptional +/// [`ParameterInOptional`]: Error::ParameterInOptional +/// [`UnescapedReservedCharacter`]: Error::UnescapedReservedCharacter +/// [`UnfinishedOptional`]: Error::UnfinishedOptional +pub fn optional<'a, Input: 'a>( + input: Input, +) -> IResult, Error> +where + Input: Clone + + Display + + Offset + + InputLength + + InputTake + + InputTakeAtPosition + + Slice> + + InputIter + + for<'s> Compare<&'s str>, + ::Item: AsChar + Copy, + Error: ParseError, + for<'s> &'s str: FindToken<::Item>, +{ + let is_text_in_optional = |c| !"(){\\/".contains(c); + + let fail = |input: Input, opening_brace| { + match input.iter_elements().next().map(AsChar::as_char) { + Some('(') => { + if let Ok((_, (opt, ..))) = peek(tuple(( + optional, + escaped_reserved_chars0(take_while(is_text_in_optional)), + tag(")"), + )))(input.clone()) + { + return Error::NestedOptional( + input.take(opt.0.input_len() + 2), + ) + .failure(); + } + return Error::UnescapedReservedCharacter(input.take(1)) + .failure(); + } + Some('{') => { + if let Ok((_, par)) = peek(parameter)(input.clone()) { + return Error::ParameterInOptional( + input.take(par.0.input_len() + 2), + ) + .failure(); + } + return Error::UnescapedReservedCharacter(input.take(1)) + .failure(); + } + Some('/') => { + return Error::AlternationInOptional(input.take(1)).failure(); + } + Some(c) if RESERVED_CHARS.contains(c) => { + return Error::UnescapedReservedCharacter(input.take(1)) + .failure(); + } + _ => {} + } + Error::UnfinishedOptional(opening_brace).failure() + }; + + let original_input = input.clone(); + let (input, opening_paren) = tag("(")(input)?; + let (input, opt) = + escaped_reserved_chars0(take_while(is_text_in_optional))(input)?; + let (input, _) = combinator::map_err(tag(")"), |_| { + fail(input.clone(), opening_paren.clone()) + })(input.clone())?; + + if opt.input_len() == 0 { + return Err(Err::Failure(Error::EmptyOptional(original_input.take(2)))); + } + + Ok((input, Optional(opt))) +} + +/// # Syntax +/// +/// ```text +/// alternative := optional +/// | (text_without_whitespace +/// | '\' whitespace_and_special)+ +/// text_without_whitespace := ^whitespace_and_special +/// whitespace_and_special := ' ' | '(' | '{' | '/' | '\' +/// ``` +/// +/// # Example +/// +/// ```text +/// text +/// escaped\ whitespace +/// no-need-to-escape)} +/// 🦀 +/// (optional) +/// ``` +/// +/// # Errors +/// +/// ## Irrecoverable [`Failure`]s +/// +/// Any [`Failure`] of [`optional()`]. +/// +/// [`Failure`]: Err::Failure +pub fn alternative<'a, Input: 'a>( + input: Input, +) -> IResult, Error> +where + Input: Clone + + Display + + Offset + + InputLength + + InputTake + + InputTakeAtPosition + + Slice> + + InputIter + + for<'s> Compare<&'s str>, + ::Item: AsChar + Copy, + Error: ParseError, + for<'s> &'s str: FindToken<::Item>, +{ + let is_text_without_whitespace = |c| !" ({\\/".contains(c); + + alt(( + map(optional, Alternative::Optional), + map( + verify( + escaped_reserved_chars0(take_while(is_text_without_whitespace)), + |p| p.input_len() > 0, + ), + Alternative::Text, + ), + ))(input) +} + +/// # Grammar +/// +/// ```text +/// alternation := single_alternation (`/` single_alternation)+ +/// single_alternation := ((text_without_whitespace+ optional*) +/// | (optional+ text_without_whitespace+))+ +/// ``` +/// +/// # Example +/// +/// ```text +/// left/right +/// left(opt)/(opt)right +/// escaped\ /text +/// no-need-to-escape)}/text +/// 🦀/⚙️ +/// ``` +/// +/// # Errors +/// +/// ## Recoverable [`Error`]s +/// +/// - If `input` doesn't have `/` +/// +/// ## Irrecoverable [`Failure`]s +/// +/// - Any [`Failure`] of [`optional()`] +/// - [`EmptyAlternation`] +/// - [`OnlyOptionalInAlternation`] +/// +/// [`Error`]: Err::Error +/// [`Failure`]: Err::Failure +/// [`EmptyAlternation`]: Error::EmptyAlternation +/// [`OnlyOptionalInAlternation`]: Error::OnlyOptionalInAlternation +pub fn alternation( + input: Input, +) -> IResult, Error> +where + Input: Clone + + Display + + Offset + + InputLength + + InputTake + + InputTakeAtPosition + + Slice> + + InputIter + + for<'s> Compare<&'s str>, + ::Item: AsChar + Copy, + Error: ParseError, + for<'s> &'s str: FindToken<::Item>, +{ + let original_input = input.clone(); + let (rest, alt) = match separated_list1(tag("/"), many1(alternative))(input) + { + Ok((rest, alt)) => { + if let Ok((_, slash)) = + peek::<_, _, Error, _>(tag("/"))(rest.clone()) + { + Err(Error::EmptyAlternation(slash).failure()) + } else if alt.len() == 1 { + Err(Err::Error(Error::Other(rest, ErrorKind::Tag))) + } else { + Ok((rest, Alternation(alt))) + } + } + Err(Err::Error(Error::Other(sp, ErrorKind::Many1))) + if peek::<_, _, Error, _>(tag("/"))(sp.clone()).is_ok() => + { + Err(Error::EmptyAlternation(sp.take(1)).failure()) + } + Err(e) => Err(e), + }?; + + alt.contains_only_optional() + .then(|| { + Err(Error::OnlyOptionalInAlternation( + original_input.take(alt.span_len()), + ) + .failure()) + }) + .unwrap_or(Ok((rest, alt))) +} + +/// # Syntax +/// +/// ```text +/// single_expression := alternation +/// | optional +/// | parameter +/// | text_without_whitespace+ +/// | whitespace +/// ``` +/// +/// # Example +/// +/// ```text +/// text(opt)/text +/// (opt) +/// {string} +/// text +/// ``` +/// +/// # Errors +/// +/// ## Irrecoverable [`Failure`]s +/// +/// Any [`Failure`] of [`alternation()`], [`optional()`] or [`parameter()`]. +/// +/// [`Error`]: Err::Error +/// [`Failure`]: Err::Failure +/// [`EmptyAlternation`]: Error::EmptyAlternation +/// [`OnlyOptionalInAlternation`]: Error::OnlyOptionalInAlternation +pub fn single_expression<'a, Input: 'a>( + input: Input, +) -> IResult, Error> +where + Input: Clone + + Display + + Offset + + InputLength + + InputTake + + InputTakeAtPosition + + Slice> + + InputIter + + for<'s> Compare<&'s str>, + ::Item: AsChar + Copy, + Error: ParseError, + for<'s> &'s str: FindToken<::Item>, +{ + let is_text_without_whitespace = |c| !" ({\\/".contains(c); + + alt(( + map(alternation, SingleExpression::Alternation), + map(optional, SingleExpression::Optional), + map(parameter, SingleExpression::Parameter), + map( + verify( + escaped_reserved_chars0(take_while(is_text_without_whitespace)), + |s| s.input_len() > 0, + ), + SingleExpression::Text, + ), + map(tag(" "), |_| SingleExpression::Whitespace), + ))(input) +} + +/// # Syntax +/// +/// ```text +/// expression := single_expression* +/// ``` +/// +/// # Example +/// +/// ```text +/// text(opt)/text +/// (opt) +/// {string} +/// text +/// ``` +/// +/// Note: empty string is matched too. +/// +/// # Errors +/// +/// ## Irrecoverable [`Failure`]s +/// +/// Any [`Failure`] of [`alternation()`], [`optional()`] or [`parameter()`]. +/// +/// [`Error`]: Err::Error +/// [`Failure`]: Err::Failure +/// [`EmptyAlternation`]: Error::EmptyAlternation +/// [`OnlyOptionalInAlternation`]: Error::OnlyOptionalInAlternation +pub fn expression<'a, Input: 'a>( + input: Input, +) -> IResult, Error> +where + Input: Clone + + Display + + Offset + + InputLength + + InputTake + + InputTakeAtPosition + + Slice> + + InputIter + + for<'s> Compare<&'s str>, + ::Item: AsChar + Copy, + Error: ParseError, + for<'s> &'s str: FindToken<::Item>, +{ + map(many0(single_expression), Expression)(input) +} + +/// Possible parsing errors. +#[derive(Debug, Display, Error, Eq, PartialEq)] +pub enum Error +where + Input: Display, +{ + /// Nested [`Parameter`]s. + #[display( + fmt = "\ + {}\n\ + A parameter may not contain an other parameter.\n\ + If you did not mean to use an optional type you can use '\\{{' to \ + escape the '{{'. For more complicated expressions consider using a \ + regular expression instead.", + _0 + )] + NestedParameter(#[error(not(source))] Input), + + /// [`Optional`] inside [`Parameter`]. + #[display( + fmt = "\ + {}\n\ + A parameter may not contain an optional type.\n\ + If you did not mean to use an parameter type you can use '\\(' to \ + escape the '('.", + _0 + )] + OptionalInParameter(#[error(not(source))] Input), + + /// Unfinished [`Parameter`]. + #[display( + fmt = "\ + {}\n\ + The '{{' does not have a matching '}}'.\n\ + If you did not intend to use a parameter you can use '\\{{' to escape \ + the '{{'.", + _0 + )] + UnfinishedParameter(#[error(not(source))] Input), + + /// Nested [`Optional`]. + #[display( + fmt = "\ + {}\n\ + An optional may not contain an other optional.\n\ + If you did not mean to use an optional type you can use '\\(' to \ + escape the '('. For more complicated expressions consider using a \ + regular expression instead.", + _0 + )] + NestedOptional(#[error(not(source))] Input), + + /// [`Parameter`] inside [`Optional`]. + #[display( + fmt = "\ + {}\n\ + An optional may not contain a parameter type.\n\ + If you did not mean to use an parameter type you can use '\\{{' to \ + escape the '{{'.", + _0 + )] + ParameterInOptional(#[error(not(source))] Input), + + /// Empty [`Optional`]. + #[display( + fmt = "\ + {}\n\ + An optional must contain some text.\n\ + If you did not mean to use an optional you can use '\\(' to escape the \ + '('.", + _0 + )] + EmptyOptional(#[error(not(source))] Input), + + /// [`Alternation`] inside [`Optional`]. + #[display( + fmt = "\ + {}\n\ + An alternation can not be used inside an optional.\n\ + You can use '\\/' to escape the '/'.", + _0 + )] + AlternationInOptional(#[error(not(source))] Input), + + /// Unfinished [`Optional`]. + #[display( + fmt = "\ + {}\n\ + The '(' does not have a matching ')'.\n\ + If you did not intend to use an optional you can use '\\(' to escape \ + the '('.", + _0 + )] + UnfinishedOptional(#[error(not(source))] Input), + + /// Empty [`Alternation`]. + #[display( + fmt = "\ + {}\n\ + Alternative may not be empty.\n\ + If you did not mean to use an alternative you can use '\\/' to escape \ + the '/'.", + _0 + )] + EmptyAlternation(#[error(not(source))] Input), + + /// Only [`Optional`] inside [`Alternation`]. + #[display( + fmt = "\ + {}\n\ + An alternative may not exclusively contain optionals.\n\ + If you did not mean to use an optional you can use '\\(' to escape the \ + '('.", + _0 + )] + OnlyOptionalInAlternation(#[error(not(source))] Input), + + /// Unescaped [`RESERVED_CHARS`]. + #[display( + fmt = "\ + {}\n\ + Unescaped reserved character.\n\ + You can use an '\\' to escape it.", + _0 + )] + UnescapedReservedCharacter(#[error(not(source))] Input), + + /// Escaped non-[`RESERVED_CHARS`]. + #[display( + fmt = "\ + {}\n\ + Only the characters '{{', '}}', '(', ')', '\\', '/' and whitespace can \ + be escaped.\n\ + If you did mean to use an '\\' you can use '\\\\' to escape it.", + _0 + )] + EscapedNonReservedCharacter(#[error(not(source))] Input), + + /// Unknown error. + #[display( + fmt = "\ + {}\n\ + Unknown parsing error.", + _0 + )] + Other(#[error(not(source))] Input, ErrorKind), + + /// Parsing requires more data. + #[display( + fmt = "{}", + "match _0 {\ + Needed::Size(n) => format!(\"Parsing requires {} bytes/chars\", n),\ + Needed::Unknown => \"Parsing requires more data\".to_owned(),\ + }" + )] + Needed(#[error(not(source))] Needed), +} + +impl Error { + /// Converts this [`Error`] into [`Failure`]. + /// + /// [`Error`]: enum@Error + /// [`Failure`]: Err::Failure + fn failure(self) -> Err { + Err::Failure(self) + } +} + +impl ParseError for Error { + fn from_error_kind(input: Input, kind: ErrorKind) -> Self { + Self::Other(input, kind) + } + + fn append(input: Input, kind: ErrorKind, other: Self) -> Self { + if let Self::Other(..) = other { + Self::from_error_kind(input, kind) + } else { + other + } + } +} + +#[cfg(test)] +mod spec { + use nom::{error::ErrorKind, Err, IResult}; + + use crate::{ + parse::{alternation, alternative, expression, optional, parameter}, + Alternative, Error, Spanned, + }; + + fn eq(left: impl AsRef, right: impl AsRef) { + assert_eq!( + left.as_ref() + .replace(' ', "") + .replace('\n', "") + .replace('\t', ""), + right + .as_ref() + .replace(' ', "") + .replace('\n', "") + .replace('\t', ""), + ); + } + + fn unwrap_parser<'s, T>( + par: IResult, T, Error>>, + ) -> T { + let (rest, par) = + par.unwrap_or_else(|e| panic!("Expected Ok, found Err: {}", e)); + assert_eq!(*rest, ""); + par + } + + mod parameter { + use super::{parameter, unwrap_parser, Err, Error, ErrorKind, Spanned}; + + #[test] + fn empty() { + assert_eq!(**unwrap_parser(parameter(Spanned::new("{}"))), ""); + } + + #[test] + fn named() { + assert_eq!( + **unwrap_parser(parameter(Spanned::new("{string}"))), + "string", + ); + } + + #[test] + fn named_with_spaces() { + assert_eq!( + **unwrap_parser(parameter(Spanned::new("{with space}"))), + "with space", + ); + } + + #[test] + fn named_with_escaped() { + assert_eq!( + **unwrap_parser(parameter(Spanned::new("{with \\{}"))), + "with \\{", + ); + } + + #[test] + fn named_with_closing_paren() { + assert_eq!( + **unwrap_parser(parameter(Spanned::new("{with )}"))), + "with )", + ); + } + + #[allow(clippy::non_ascii_literal)] + #[test] + fn named_with_emoji() { + assert_eq!(**unwrap_parser(parameter(Spanned::new("{🦀}"))), "🦀",); + } + + #[test] + fn errors_on_empty() { + let span = Spanned::new(""); + assert_eq!( + parameter(span), + Err(Err::Error(Error::Other(span, ErrorKind::Tag))), + ); + } + + #[test] + fn fails_on_escaped_non_reserved() { + let err = parameter(Spanned::new("{\\r}")).unwrap_err(); + + match err { + Err::Failure(Error::EscapedNonReservedCharacter(e)) => { + assert_eq!(*e, "\\"); + } + Err::Incomplete(_) | Err::Error(_) | Err::Failure(_) => { + panic!("wrong error: {:?}", err) + } + } + } + + #[test] + fn fails_on_nested() { + let err = [ + parameter(Spanned::new("{{nest}}")).expect_err("error"), + parameter(Spanned::new("{before{nest}}")).expect_err("error"), + parameter(Spanned::new("{{nest}after}")).expect_err("error"), + parameter(Spanned::new("{bef{nest}aft}")).expect_err("error"), + ]; + + match err { + #[rustfmt::skip] + [ + Err::Failure(Error::NestedParameter(e1)), + Err::Failure(Error::NestedParameter(e2)), + Err::Failure(Error::NestedParameter(e3)), + Err::Failure(Error::NestedParameter(e4)), + ] => { + assert_eq!(*e1, "{nest}"); + assert_eq!(*e2, "{nest}"); + assert_eq!(*e3, "{nest}"); + assert_eq!(*e4, "{nest}"); + } + _ => panic!("wrong error: {:?}", err), + } + } + + #[test] + fn fails_on_optional() { + let err = [ + parameter(Spanned::new("{(nest)}")).expect_err("error"), + parameter(Spanned::new("{before(nest)}")).expect_err("error"), + parameter(Spanned::new("{(nest)after}")).expect_err("error"), + parameter(Spanned::new("{bef(nest)aft}")).expect_err("error"), + ]; + + match err { + #[rustfmt::skip] + [ + Err::Failure(Error::OptionalInParameter(e1)), + Err::Failure(Error::OptionalInParameter(e2)), + Err::Failure(Error::OptionalInParameter(e3)), + Err::Failure(Error::OptionalInParameter(e4)), + ] => { + assert_eq!(*e1, "(nest)"); + assert_eq!(*e2, "(nest)"); + assert_eq!(*e3, "(nest)"); + assert_eq!(*e4, "(nest)"); + } + _ => panic!("wrong error: {:?}", err), + } + } + + #[test] + fn fails_on_unescaped_reserved_char() { + let err = [ + parameter(Spanned::new("{(opt}")).expect_err("error"), + parameter(Spanned::new("{(n(e)st)}")).expect_err("error"), + parameter(Spanned::new("{{nest}")).expect_err("error"), + parameter(Spanned::new("{l/r}")).expect_err("error"), + ]; + + match err { + #[rustfmt::skip] + [ + Err::Failure(Error::UnescapedReservedCharacter(e1)), + Err::Failure(Error::UnescapedReservedCharacter(e2)), + Err::Failure(Error::UnescapedReservedCharacter(e3)), + Err::Failure(Error::UnescapedReservedCharacter(e4)), + ] => { + assert_eq!(*e1, "("); + assert_eq!(*e2, "("); + assert_eq!(*e3, "{"); + assert_eq!(*e4, "/"); + } + _ => panic!("wrong error: {:?}", err), + } + } + + #[test] + fn fails_on_unfinished() { + let err = [ + parameter(Spanned::new("{")).expect_err("error"), + parameter(Spanned::new("{name ")).expect_err("error"), + ]; + + match err { + #[rustfmt::skip] + [ + Err::Failure(Error::UnfinishedParameter(e1)), + Err::Failure(Error::UnfinishedParameter(e2)) + ] => { + assert_eq!(*e1, "{"); + assert_eq!(*e2, "{"); + } + _ => panic!("wrong error: {:?}", err), + } + } + } + + mod optional { + use super::{optional, unwrap_parser, Err, Error, ErrorKind, Spanned}; + + #[test] + fn basic() { + assert_eq!( + **unwrap_parser(optional(Spanned::new("(string)"))), + "string", + ); + } + + #[test] + fn with_spaces() { + assert_eq!( + **unwrap_parser(optional(Spanned::new("(with space)"))), + "with space", + ); + } + + #[test] + fn with_escaped() { + assert_eq!( + **unwrap_parser(optional(Spanned::new("(with \\{)"))), + "with \\{", + ); + } + + #[test] + fn with_closing_brace() { + assert_eq!( + **unwrap_parser(optional(Spanned::new("(with })"))), + "with }", + ); + } + + #[allow(clippy::non_ascii_literal)] + #[test] + fn with_emoji() { + assert_eq!(**unwrap_parser(optional(Spanned::new("(🦀)"))), "🦀"); + } + + #[test] + fn errors_on_empty() { + let span = Spanned::new(""); + + assert_eq!( + optional(span), + Err(Err::Error(Error::Other(span, ErrorKind::Tag))), + ); + } + + #[test] + fn fails_on_empty() { + let err = optional(Spanned::new("()")).unwrap_err(); + + match err { + Err::Failure(Error::EmptyOptional(e)) => { + assert_eq!(*e, "()"); + } + Err::Incomplete(_) | Err::Error(_) | Err::Failure(_) => { + panic!("wrong error: {:?}", err) + } + } + } + + #[test] + fn fails_on_escaped_non_reserved() { + let err = optional(Spanned::new("(\\r)")).unwrap_err(); + + match err { + Err::Failure(Error::EscapedNonReservedCharacter(e)) => { + assert_eq!(*e, "\\"); + } + Err::Incomplete(_) | Err::Error(_) | Err::Failure(_) => { + panic!("wrong error: {:?}", err) + } + } + } + + #[test] + fn fails_on_nested() { + let err = [ + optional(Spanned::new("((nest))")).expect_err("error"), + optional(Spanned::new("(before(nest))")).expect_err("error"), + optional(Spanned::new("((nest)after)")).expect_err("error"), + optional(Spanned::new("(bef(nest)aft)")).expect_err("error"), + ]; + + match err { + #[rustfmt::skip] + [ + Err::Failure(Error::NestedOptional(e1)), + Err::Failure(Error::NestedOptional(e2)), + Err::Failure(Error::NestedOptional(e3)), + Err::Failure(Error::NestedOptional(e4)), + ] => { + assert_eq!(*e1, "(nest)"); + assert_eq!(*e2, "(nest)"); + assert_eq!(*e3, "(nest)"); + assert_eq!(*e4, "(nest)"); + } + _ => panic!("wrong error: {:?}", err), + } + } + + #[test] + fn fails_on_parameter() { + let err = [ + optional(Spanned::new("({nest})")).expect_err("error"), + optional(Spanned::new("(before{nest})")).expect_err("error"), + optional(Spanned::new("({nest}after)")).expect_err("error"), + optional(Spanned::new("(bef{nest}aft)")).expect_err("error"), + ]; + + match err { + #[rustfmt::skip] + [ + Err::Failure(Error::ParameterInOptional(e1)), + Err::Failure(Error::ParameterInOptional(e2)), + Err::Failure(Error::ParameterInOptional(e3)), + Err::Failure(Error::ParameterInOptional(e4)), + ] => { + assert_eq!(*e1, "{nest}"); + assert_eq!(*e2, "{nest}"); + assert_eq!(*e3, "{nest}"); + assert_eq!(*e4, "{nest}"); + } + _ => panic!("wrong error: {:?}", err), + } + } + + #[test] + fn fails_on_alternation() { + let err = [ + optional(Spanned::new("(/)")).expect_err("error"), + optional(Spanned::new("(bef/)")).expect_err("error"), + optional(Spanned::new("(/aft)")).expect_err("error"), + optional(Spanned::new("(bef/aft)")).expect_err("error"), + ]; + + match err { + #[rustfmt::skip] + [ + Err::Failure(Error::AlternationInOptional(e1)), + Err::Failure(Error::AlternationInOptional(e2)), + Err::Failure(Error::AlternationInOptional(e3)), + Err::Failure(Error::AlternationInOptional(e4)), + ] => { + assert_eq!(*e1, "/"); + assert_eq!(*e2, "/"); + assert_eq!(*e3, "/"); + assert_eq!(*e4, "/"); + } + _ => panic!("wrong error: {:?}", err), + } + } + + #[test] + fn fails_on_unescaped_reserved_char() { + let err = [ + optional(Spanned::new("({opt)")).expect_err("error"), + optional(Spanned::new("({n{e}st})")).expect_err("error"), + optional(Spanned::new("((nest)")).expect_err("error"), + ]; + + match err { + #[rustfmt::skip] + [ + Err::Failure(Error::UnescapedReservedCharacter(e1)), + Err::Failure(Error::UnescapedReservedCharacter(e2)), + Err::Failure(Error::UnescapedReservedCharacter(e3)), + ] => { + assert_eq!(*e1, "{"); + assert_eq!(*e2, "{"); + assert_eq!(*e3, "("); + } + _ => panic!("wrong error: {:?}", err), + } + } + + #[test] + fn fails_on_unfinished() { + let err = [ + optional(Spanned::new("(")).expect_err("error"), + optional(Spanned::new("(name ")).expect_err("error"), + ]; + + match err { + #[rustfmt::skip] + [ + Err::Failure(Error::UnfinishedOptional(e1)), + Err::Failure(Error::UnfinishedOptional(e2)) + ] => { + assert_eq!(*e1, "("); + assert_eq!(*e2, "("); + } + _ => panic!("wrong error: {:?}", err), + } + } + } + + mod alternative { + use super::{ + alternative, unwrap_parser, Alternative, Err, Error, ErrorKind, + Spanned, + }; + + #[allow(clippy::non_ascii_literal)] + #[test] + fn text() { + match ( + unwrap_parser(alternative(Spanned::new("string"))), + unwrap_parser(alternative(Spanned::new("🦀"))), + ) { + (Alternative::Text(t1), Alternative::Text(t2)) => { + assert_eq!(*t1, "string"); + assert_eq!(*t2, "🦀"); + } + _ => { + panic!("expected Alternative::Text") + } + } + } + + #[test] + fn escaped_spaces() { + match ( + unwrap_parser(alternative(Spanned::new("bef\\ "))), + unwrap_parser(alternative(Spanned::new("\\ aft"))), + unwrap_parser(alternative(Spanned::new("bef\\ aft"))), + ) { + ( + Alternative::Text(t1), + Alternative::Text(t2), + Alternative::Text(t3), + ) => { + assert_eq!(*t1, "bef\\ "); + assert_eq!(*t2, "\\ aft"); + assert_eq!(*t3, "bef\\ aft"); + } + _ => { + panic!("expected Alternative::Text") + } + } + } + + #[test] + fn optional() { + match unwrap_parser(alternative(Spanned::new("(opt)"))) { + Alternative::Optional(t) => { + assert_eq!(**t, "opt"); + } + Alternative::Text(_) => { + panic!("expected Alternative::Optional") + } + } + } + + #[test] + fn not_captures_unescaped_whitespace() { + match alternative(Spanned::new("text ")) { + Ok((rest, matched)) => { + assert_eq!(*rest, " "); + + match matched { + Alternative::Text(t) => assert_eq!(*t, "text"), + Alternative::Optional(_) => { + panic!("expected Alternative::Text") + } + } + } + Err(..) => panic!("expected ok"), + } + } + + #[test] + fn errors_on_empty() { + match alternative(Spanned::new("")).unwrap_err() { + Err::Error(Error::Other(_, ErrorKind::Alt)) => {} + e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => { + panic!("wrong error: {:?}", e) + } + } + } + + #[test] + fn fails_on_unfinished_optional() { + let err = ( + alternative(Spanned::new("(")).unwrap_err(), + alternative(Spanned::new("(opt")).unwrap_err(), + ); + + match err { + ( + Err::Failure(Error::UnfinishedOptional(e1)), + Err::Failure(Error::UnfinishedOptional(e2)), + ) => { + assert_eq!(*e1, "("); + assert_eq!(*e2, "("); + } + _ => panic!("wrong error: {:?}", err), + } + } + + #[test] + fn fails_on_escaped_non_reserved() { + let err = ( + alternative(Spanned::new("(\\r)")).unwrap_err(), + alternative(Spanned::new("\\r")).unwrap_err(), + ); + + match err { + ( + Err::Failure(Error::EscapedNonReservedCharacter(e1)), + Err::Failure(Error::EscapedNonReservedCharacter(e2)), + ) => { + assert_eq!(*e1, "\\"); + assert_eq!(*e2, "\\"); + } + _ => panic!("wrong error: {:?}", err), + } + } + } + + mod alternation { + use super::{ + alternation, eq, unwrap_parser, Err, Error, ErrorKind, Spanned, + }; + + #[allow(clippy::non_ascii_literal)] + #[test] + fn basic() { + let ast = format!( + "{:?}", + unwrap_parser(alternation(Spanned::new("l/🦀"))) + ); + + eq( + ast, + r#"Alternation ( + [ + [ + Text ( + LocatedSpan { + offset: 0, + line: 1, + fragment: "l", + extra: () + } + ) + ], + [ + Text ( + LocatedSpan { + offset: 2, + line: 1, + fragment: "🦀", + extra: () + } + ) + ] + ] + )"#, + ); + } + + #[test] + fn with_optionals() { + let ast = format!( + "{:?}", + unwrap_parser(alternation(Spanned::new( + "l(opt)/(opt)r/l(opt)r" + ))), + ); + + eq( + ast, + r#"Alternation ( + [ + [ + Text ( + LocatedSpan { + offset: 0, + line: 1, + fragment: "l", + extra: () + } + ), + Optional ( + Optional ( + LocatedSpan { + offset: 2, + line: 1, + fragment: "opt", + extra: () + } + ) + ) + ], + [ + Optional ( + Optional ( + LocatedSpan { + offset: 8, + line: 1, + fragment: "opt", + extra: () + } + ) + ), + Text ( + LocatedSpan { + offset: 12, + line: 1, + fragment: "r", + extra: () + } + ) + ], + [ + Text ( + LocatedSpan { + offset: 14, + line: 1, + fragment: "l", + extra: () + } + ), + Optional ( + Optional ( + LocatedSpan { + offset: 16, + line: 1, + fragment: "opt", + extra: () + } + ) + ), + Text ( + LocatedSpan { + offset: 20, + line: 1, + fragment: "r", + extra: () + } + ) + ] + ] + )"#, + ); + } + + #[allow(clippy::too_many_lines)] + #[test] + fn with_more_optionals() { + let ast = format!( + "{:?}", + unwrap_parser(alternation(Spanned::new( + "l(opt)(opt)/(opt)(opt)r/(opt)m(opt)" + ))), + ); + + eq( + ast, + r#"Alternation ( + [ + [ + Text ( + LocatedSpan { + offset: 0, + line: 1, + fragment: "l", + extra: () + } + ), + Optional ( + Optional ( + LocatedSpan { + offset: 2, + line: 1, + fragment: "opt", + extra: () + } + ) + ), + Optional ( + Optional ( + LocatedSpan { + offset: 7, + line: 1, + fragment: "opt", + extra: () + } + ) + ) + ], + [ + Optional ( + Optional ( + LocatedSpan { + offset: 13, + line: 1, + fragment: "opt", + extra: () + } + ) + ), + Optional ( + Optional ( + LocatedSpan { + offset: 18, + line: 1, + fragment: "opt", + extra: () + } + ) + ), + Text ( + LocatedSpan { + offset: 22, + line: 1, + fragment: "r", + extra: () + } + ) + ], + [ + Optional ( + Optional ( + LocatedSpan { + offset: 25, + line: 1, + fragment: "opt", + extra: () + } + ) + ), + Text ( + LocatedSpan { + offset: 29, + line: 1, + fragment: "m", + extra: () + } + ), + Optional ( + Optional ( + LocatedSpan { + offset: 31, + line: 1, + fragment: "opt", + extra: () + } + ) + ) + ] + ] + )"#, + ); + } + + #[test] + fn errors_without_slash() { + match ( + alternation(Spanned::new("")).unwrap_err(), + alternation(Spanned::new("{par}")).unwrap_err(), + alternation(Spanned::new("text")).unwrap_err(), + alternation(Spanned::new("(opt)")).unwrap_err(), + ) { + ( + Err::Error(Error::Other(_, ErrorKind::Many1)), + Err::Error(Error::Other(_, ErrorKind::Many1)), + Err::Error(Error::Other(_, ErrorKind::Tag)), + Err::Error(Error::Other(_, ErrorKind::Tag)), + ) => {} + _ => panic!("wrong err"), + } + } + + #[test] + fn fails_on_empty_alternation() { + let err = ( + alternation(Spanned::new("/")).unwrap_err(), + alternation(Spanned::new("l/")).unwrap_err(), + alternation(Spanned::new("/r")).unwrap_err(), + alternation(Spanned::new("l/m/")).unwrap_err(), + alternation(Spanned::new("l//r")).unwrap_err(), + alternation(Spanned::new("/m/r")).unwrap_err(), + ); + + match err { + ( + Err::Failure(Error::EmptyAlternation(e1)), + Err::Failure(Error::EmptyAlternation(e2)), + Err::Failure(Error::EmptyAlternation(e3)), + Err::Failure(Error::EmptyAlternation(e4)), + Err::Failure(Error::EmptyAlternation(e5)), + Err::Failure(Error::EmptyAlternation(e6)), + ) => { + assert_eq!(*e1, "/"); + assert_eq!(*e2, "/"); + assert_eq!(*e3, "/"); + assert_eq!(*e4, "/"); + assert_eq!(*e5, "/"); + assert_eq!(*e6, "/"); + } + _ => panic!("wrong error: {:?}", err), + } + } + + #[test] + fn fails_on_only_optional() { + let err = ( + alternation(Spanned::new("text/(opt)")).unwrap_err(), + alternation(Spanned::new("text/(opt)(opt)")).unwrap_err(), + alternation(Spanned::new("(opt)/text")).unwrap_err(), + alternation(Spanned::new("(opt)/(opt)")).unwrap_err(), + ); + + match err { + ( + Err::Failure(Error::OnlyOptionalInAlternation(e1)), + Err::Failure(Error::OnlyOptionalInAlternation(e2)), + Err::Failure(Error::OnlyOptionalInAlternation(e3)), + Err::Failure(Error::OnlyOptionalInAlternation(e4)), + ) => { + assert_eq!(*e1, "text/(opt)"); + assert_eq!(*e2, "text/(opt)(opt)"); + assert_eq!(*e3, "(opt)/text"); + assert_eq!(*e4, "(opt)/(opt)"); + } + _ => panic!("wrong error: {:?}", err), + } + } + } + + // all test examples from: https://bit.ly/3q6m53v + mod expression { + use super::{eq, expression, unwrap_parser, Err, Error, Spanned}; + + #[test] + fn allows_escaped_optional_parameter_types() { + let ast = format!( + "{:?}", + unwrap_parser(expression(Spanned::new("\\({int})"))) + ); + eq( + ast, + r#"Expression ( + [ + Text ( + LocatedSpan { + offset: 0, + line: 1, + fragment: "\\(", + extra: () + } + ), + Parameter ( + Parameter ( + LocatedSpan { + offset: 3, + line: 1, + fragment: "int", + extra: () + } + ) + ), + Text ( + LocatedSpan { + offset: 7, + line: 1, + fragment: ")", + extra: () + } + ) + ] + )"#, + ); + } + + #[test] + fn allows_parameter_type_in_alternation() { + let ast = format!( + "{:?}", + unwrap_parser(expression(Spanned::new("a/i{int}n/y"))) + ); + eq( + ast, + r#"Expression( + [ + Alternation ( + Alternation ( + [ + [ + Text ( + LocatedSpan { + offset: 0, + line: 1, + fragment: "a", + extra: () + } + ) + ], + [ + Text ( + LocatedSpan { + offset: 2, + line: 1, + fragment: "i", + extra: () + } + ) + ] + ] + ) + ), + Parameter ( + Parameter ( + LocatedSpan { + offset: 4, + line: 1, + fragment: "int", + extra: () + } + ) + ), + Alternation ( + Alternation ( + [ + [ + Text ( + LocatedSpan { + offset: 8, + line: 1, + fragment: "n", + extra: () + } + ) + ], + [ + Text ( + LocatedSpan { + offset: 10, + line: 1, + fragment: "y", + extra: () + } + ) + ] + ] + ) + ) + ] + )"#, + ); + } + + #[test] + fn does_allow_parameter_adjacent_to_alternation() { + let ast = format!( + "{:?}", + unwrap_parser(expression(Spanned::new("{int}st/nd/rd/th"))) + ); + eq( + ast, + r#"Expression ( + [ + Parameter ( + Parameter ( + LocatedSpan { + offset: 1, + line: 1, + fragment: "int", + extra: () + } + ) + ), + Alternation ( + Alternation ( + [ + [ + Text ( + LocatedSpan { + offset: 5, + line: 1, + fragment: + "st", + extra: () + } + ) + ], + [ + Text ( + LocatedSpan { + offset: 8, + line: 1, + fragment: "nd", + extra: () + } + ) + ], + [ + Text ( + LocatedSpan { + offset: 11, + line: 1, + fragment: "rd", + extra: () + } + ) + ], + [ + Text ( + LocatedSpan { + offset: 14, + line: 1, + fragment: "th", + extra: () + } + ) + ] + ] + ) + ) + ] + )"#, + ); + } + + #[test] + fn does_not_allow_alternation_in_optional() { + match expression(Spanned::new("three( brown/black) mice")) + .unwrap_err() + { + Err::Failure(Error::AlternationInOptional(s)) => { + assert_eq!(*s, "/"); + } + e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => { + panic!("wrong error: {:?}", e) + } + } + } + + #[rustfmt::skip] + #[test] + fn does_not_allow_alternation_with_empty_alternative_by_adjacent_left_parameter() { + match expression(Spanned::new("{int}/x")).unwrap_err() { + Err::Failure(Error::EmptyAlternation(s)) => { + assert_eq!(*s, "/"); + } + e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => panic!("wrong error: {:?}", e), + } + } + + #[rustfmt::skip] + #[test] + fn does_not_allow_alternation_with_empty_alternative_by_adjacent_optional() { + match expression(Spanned::new("three (brown)/black mice")).unwrap_err() { + Err::Failure(Error::OnlyOptionalInAlternation(s)) => { + assert_eq!(*s, "(brown)/black"); + } + e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => panic!("wrong error: {:?}", e), + } + } + + #[rustfmt::skip] + #[test] + fn does_not_allow_alternation_with_empty_alternative_by_adjacent_right_parameter() { + match expression(Spanned::new("x/{int}")).unwrap_err() { + Err::Failure(Error::EmptyAlternation(s)) => { + assert_eq!(*s, "/"); + } + e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => panic!("wrong error: {:?}", e), + } + } + + #[test] + fn does_not_allow_alternation_with_empty_alternative() { + match expression(Spanned::new("three brown//black mice")) + .unwrap_err() + { + Err::Failure(Error::EmptyAlternation(s)) => { + assert_eq!(*s, "/"); + } + e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => { + panic!("wrong error: {:?}", e) + } + } + } + + #[test] + fn does_not_allow_empty_optional() { + match expression(Spanned::new("three () mice")).unwrap_err() { + Err::Failure(Error::EmptyOptional(s)) => { + assert_eq!(*s, "()"); + } + e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => { + panic!("wrong error: {:?}", e) + } + } + } + + #[test] + fn does_not_allow_nested_optional() { + match expression(Spanned::new("(a(b))")).unwrap_err() { + Err::Failure(Error::NestedOptional(s)) => { + assert_eq!(*s, "(b)"); + } + e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => { + panic!("wrong error: {:?}", e) + } + } + } + + #[test] + fn does_not_allow_optional_parameter_types() { + match expression(Spanned::new("({int})")).unwrap_err() { + Err::Failure(Error::ParameterInOptional(s)) => { + assert_eq!(*s, "{int}"); + } + e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => { + panic!("wrong error: {:?}", e) + } + } + } + + #[test] + fn does_not_allow_parameter_name_with_reserved_characters() { + match expression(Spanned::new("{(string)}")).unwrap_err() { + Err::Failure(Error::OptionalInParameter(s)) => { + assert_eq!(*s, "(string)"); + } + e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => { + panic!("wrong error: {:?}", e) + } + } + } + + #[test] + fn does_not_allow_unfinished_parenthesis_1() { + match expression(Spanned::new( + "three (exceptionally\\) {string\\} mice", + )) + .unwrap_err() + { + Err::Failure(Error::UnescapedReservedCharacter(s)) => { + assert_eq!(*s, "{"); + } + e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => { + panic!("wrong error: {:?}", e) + } + } + } + + #[test] + fn does_not_allow_unfinished_parenthesis_2() { + match expression(Spanned::new( + "three (exceptionally\\) {string} mice", + )) + .unwrap_err() + { + Err::Failure(Error::ParameterInOptional(s)) => { + assert_eq!(*s, "{string}"); + } + e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => { + panic!("wrong error: {:?}", e) + } + } + } + + #[test] + fn does_not_allow_unfinished_parenthesis_3() { + match expression(Spanned::new( + "three ((exceptionally\\) strong) mice", + )) + .unwrap_err() + { + Err::Failure(Error::UnescapedReservedCharacter(s)) => { + assert_eq!(*s, "("); + } + e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => { + panic!("wrong error: {:?}", e) + } + } + } + + #[test] + fn matches_alternation() { + let ast = format!( + "{:?}", + unwrap_parser(expression(Spanned::new( + "mice/rats and rats\\/mice" + ))) + ); + eq( + ast, + r#"Expression ( + [ + Alternation ( + Alternation ( + [ + [ + Text ( + LocatedSpan { + offset: 0, + line: 1, + fragment: "mice", + extra: () + } + ) + ], + [ + Text ( + LocatedSpan { + offset: 5, + line: 1, + fragment: "rats", + extra: () + } + ) + ] + ] + ) + ), + Whitespace, + Text ( + LocatedSpan { + offset: 10, + line: 1, + fragment: "and", + extra: () + } + ), + Whitespace, + Text ( + LocatedSpan { + offset: 14, + line: 1, + fragment: "rats\\/mice", + extra: () + } + ) + ] + )"#, + ); + } + + #[test] + fn matches_anonymous_parameter_type() { + let ast = + format!("{:?}", unwrap_parser(expression(Spanned::new("{}")))); + eq( + ast, + r#"Expression ( + [ + Parameter ( + Parameter ( + LocatedSpan { + offset: 1, + line: 1, + fragment: "", + extra: () + } + ) + ) + ] + )"#, + ); + } + + #[test] + fn matches_doubly_escaped_parenthesis() { + let ast = format!( + "{:?}", + unwrap_parser(expression(Spanned::new( + "three \\(exceptionally) \\{string} mice" + ))) + ); + eq( + ast, + r#"Expression ( + [ + Text ( + LocatedSpan { + offset: 0, + line: 1, + fragment: "three", + extra: () + } + ), + Whitespace, + Text ( + LocatedSpan { + offset: 6, + line: 1, + fragment: "\\(exceptionally)", + extra: () + } + ), + Whitespace, + Text ( + LocatedSpan { + offset: 23, + line: 1, + fragment: "\\{string}", + extra: () + } + ), + Whitespace, + Text ( + LocatedSpan { + offset: 33, + line: 1, + fragment: "mice", + extra: () + } + ) + ] + )"#, + ); + } + + #[test] + fn matches_doubly_escaped_slash() { + let ast = format!( + "{:?}", + unwrap_parser(expression(Spanned::new("12\\\\/2020"))) + ); + eq( + ast, + r#"Expression ( + [ + Alternation ( + Alternation ( + [ + [ + Text ( + LocatedSpan { + offset: 0, + line: 1, + fragment: "12\\\\", + extra: () + } + ) + ], + [ + Text ( + LocatedSpan { + offset: 5, + line: 1, + fragment: "2020", + extra: () + } + ) + ] + ] + ) + ) + ] + )"#, + ); + } + + #[test] + fn matches_optional_before_alternation() { + let ast = format!( + "{:?}", + unwrap_parser(expression(Spanned::new( + "three (brown )mice/rats" + ))) + ); + eq( + ast, + r#"Expression ( + [ + Text ( + LocatedSpan { + offset: 0, + line: 1, + fragment: "three", + extra: () + } + ), + Whitespace, + Alternation ( + Alternation ( + [ + [ + Optional ( + Optional ( + LocatedSpan { + offset: 7, + line: 1, + fragment: "brown", + extra: () + } + ) + ), + Text ( + LocatedSpan { + offset: 14, + line: 1, + fragment: "mice", + extra: () + } + ) + ], + [ + Text ( + LocatedSpan { + offset: 19, + line: 1, + fragment: "rats", + extra: () + } + ) + ] + ] + ) + ) + ] + )"#, + ); + } + + #[test] + fn matches_optional_in_alternation() { + let ast = format!( + "{:?}", + unwrap_parser(expression(Spanned::new( + "{int} rat(s)/mouse/mice" + ))) + ); + eq( + ast, + r#"Expression ( + [ + Parameter ( + Parameter ( + LocatedSpan { + offset: 1, + line: 1, + fragment: "int", + extra: () + } + ) + ), + Whitespace, + Alternation ( + Alternation ( + [ + [ + Text ( + LocatedSpan { + offset: 6, + line: 1, + fragment: "rat", + extra: () + } + ), + Optional ( + Optional ( + LocatedSpan { + offset: 10, + line: 1, + fragment: "s", + extra: () + } + ) + ) + ], + [ + Text ( + LocatedSpan { + offset: 13, + line: 1, + fragment: "mouse", + extra: () + } + ) + ], + [ + Text ( + LocatedSpan { + offset: 19, + line: 1, + fragment: "mice", + extra: () + } + ) + ] + ] + ) + ) + ] + )"#, + ); + } + + #[test] + fn empty() { + let ast = + format!("{:?}", unwrap_parser(expression(Spanned::new("")))); + eq(ast, r#"Expression([])"#); + } + } +} From 1032997a2479bb3dec6742ab463dc4ff73e3c317 Mon Sep 17 00:00:00 2001 From: ilslv Date: Thu, 18 Nov 2021 14:53:43 +0300 Subject: [PATCH 2/4] Parse multiple whitespaces, not 1-by-1 --- README.md | 38 +++ src/ast.rs | 44 +-- src/combinator.rs | 4 +- src/parse.rs | 789 ++++++++++++++++++++++++++-------------------- 4 files changed, 504 insertions(+), 371 deletions(-) diff --git a/README.md b/README.md index 7b9e623..a60072b 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,42 @@ This crate provides [AST] and parser of [Cucumber Expressions]. +## Grammar + +This implementation follows context-free grammar, [which isn't yet merged][1]. Original grammar is impossible to follow while creating performant parser, as it consists errors and describes not `Cucumber Expressions`, but some superset language and context-sensitive. In case you've found some inconsistencies between implementations in other languages, please file an issue! + +[EBNF] spec +```ebnf +expression = single-expression* + +single-expression = alternation + | optional + | parameter + | text-without-whitespace+ + | whitespace +text-without-whitespace = (- (text-to-escape | whitespace)) + | ('\', text-to-escape) +text-to-escape = '(' | '{' | '/' | '\' + +alternation = single-alternation, (`/`, single-alternation)+ +single-alternation = ((text-in-alternative+, optional*) + | (optional+, text-in-alternative+))+ +text-in-alternative = (- alternative-to-escape) + | ('\', alternative-to-escape) +alternative-to-escape = ' ' | '(' | '{' | '/' | '\' + +optional = '(' text-in-optional+ ')' +text-in-optional = (- optional-to-escape) | ('\', optional-to-escape) +optional-to-escape = '(' | ')' | '{' | '/' | '\' + +parameter = '{', name*, '}' +name = (- name-to-escape) | ('\', name-to-escape) +name-to-escape = '{' | '}' | '(' | '/' | '\' +``` + + + + ## License This project is licensed under either of @@ -27,5 +63,7 @@ at your option. +[1]: https://github.com/cucumber/cucumber-expressions/issues/41 [AST]: https://en.wikipedia.org/wiki/Abstract_syntax_tree [Cucumber Expressions]: https://github.com/cucumber/cucumber-expressions#readme +[EBNF]: https://en.wikipedia.org/wiki/Extended_Backus–Naur_form diff --git a/src/ast.rs b/src/ast.rs index 522b4ec..d146e94 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -14,7 +14,7 @@ //! //! [1]: https://github.com/cucumber/cucumber-expressions#readme //! [2]: https://en.wikipedia.org/wiki/Abstract_syntax_tree -//! [3]: https://tinyurl.com/cucumber-expr-spec#grammar +//! [3]: crate#grammar use derive_more::{AsRef, Deref, DerefMut}; use nom::{error::ErrorKind, Err, InputLength}; @@ -25,11 +25,11 @@ use crate::parse; /// [`str`] along with its location information in the original string. pub type Spanned<'s> = LocatedSpan<&'s str>; -/// Top-level [`cucumber-expression`][3]. +/// Top-level [`cucumber-expression`][1]. /// /// See [`parse::expression()`] for the detailed grammar and examples. /// -/// [3]: https://tinyurl.com/cucumber-expr-spec#grammar +/// [1]: crate#grammar #[derive(AsRef, Clone, Debug, Deref, DerefMut, Eq, PartialEq)] pub struct Expression(pub Vec>); @@ -56,44 +56,44 @@ impl<'s> Expression> { /// # Errors /// /// See [`parse::Error`] for details. - pub fn parse>( + pub fn parse + ?Sized>( input: &'s I, ) -> Result>> { Self::try_from(input.as_ref()) } } -/// Single entry of a [`cucumber-expression`][3]. +/// Single entry of a [`cucumber-expression`][1]. /// /// See [`parse::single_expression()`] for the detailed grammar and examples. /// -/// [3]: https://tinyurl.com/cucumber-expr-spec#grammar +/// [1]: crate#grammar #[derive(Clone, Debug, Eq, PartialEq)] pub enum SingleExpression { - /// [`alternation`][3] expression. + /// [`alternation`][1] expression. /// - /// [3]: https://tinyurl.com/cucumber-expr-spec#grammar + /// [1]: crate#grammar Alternation(Alternation), - /// [`optional`][3] expression. + /// [`optional`][1] expression. /// - /// [3]: https://tinyurl.com/cucumber-expr-spec#grammar + /// [1]: crate#grammar Optional(Optional), - /// [`parameter`][3] expression. + /// [`parameter`][1] expression. /// - /// [3]: https://tinyurl.com/cucumber-expr-spec#grammar + /// [1]: crate#grammar Parameter(Parameter), /// Text without whitespaces. Text(Input), - /// Whitespaces are treated as a special case to avoid lookaheads and - /// lookbehinds described in the [architecture][1]. This allows parsing to - /// have `O(n)` complexity. + /// Whitespaces are treated as a special case to avoid placing every `text` + /// character in separate [AST] node, as described in [grammar spec]. /// - /// [1]: https://tinyurl.com/cucumber-expr-spec - Whitespace, + /// [AST]: https://en.wikipedia.org/wiki/Abstract_syntax_tree + /// [grammar spec]: crate#grammar + Whitespaces(Input), } /// Allows to match one of [`SingleAlternation`]s. @@ -102,7 +102,7 @@ pub enum SingleExpression { #[derive(AsRef, Clone, Debug, Deref, DerefMut, Eq, PartialEq)] pub struct Alternation(pub Vec>); -/// Building block an [`Alternation`]. +/// Building block of an [`Alternation`]. pub type SingleAlternation = Vec>; impl Alternation { @@ -135,14 +135,16 @@ impl Alternation { } } -/// [`alternative`][3] expression. +/// [`alternative`][1] expression. /// /// See [`parse::alternative()`] for the detailed grammar and examples. +/// +/// [1]: crate#grammar #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum Alternative { - /// [`optional`][3] expression. + /// [`optional`][1] expression. /// - /// [3]: https://tinyurl.com/cucumber-expr-spec#grammar + /// [1]: crate#grammar Optional(Optional), /// Text. diff --git a/src/combinator.rs b/src/combinator.rs index 4db258c..d6ffa14 100644 --- a/src/combinator.rs +++ b/src/combinator.rs @@ -41,7 +41,7 @@ where /// 2. If `normal` matched empty sequence and then `escapable` didn't match /// anything, returns empty sequence; /// 3. Errors with [`ErrorKind::Escaped`] if `control_char` was followed by a -/// non-`escapable` `Input`. +/// non-`escapable` `Input` or end of line. /// /// [`escaped()`]: nom::bytes::complete::escaped() pub(crate) fn escaped0<'a, Input: 'a, Error, F, G, O1, O2>( @@ -277,7 +277,7 @@ mod escaped0_spec { } #[test] - fn errors_on_control_char() { + fn errors_on_ending_with_control_char() { assert_eq!( get_result("\\"), ( diff --git a/src/parse.rs b/src/parse.rs index b918699..548a485 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -14,14 +14,14 @@ //! //! [1]: https://github.com/cucumber/cucumber-expressions#readme //! [2]: https://en.wikipedia.org/wiki/Abstract_syntax_tree -//! [3]: https://tinyurl.com/cucumber-expr-spec#grammar +//! [3]: crate#grammar use std::{fmt::Display, ops::RangeFrom}; use derive_more::{Display, Error}; use nom::{ branch::alt, - bytes::complete::{tag, take_while}, + bytes::complete::{tag, take_while, take_while1}, character::complete::one_of, combinator::{map, peek, verify}, error::{ErrorKind, ParseError}, @@ -55,9 +55,11 @@ pub const RESERVED_CHARS: &str = r#"{}()\/ "#; /// ## Irrecoverable [`Failure`] /// /// - If `normal` parser fails +/// - [`EscapedEndOfLine`] /// - [`EscapedNonReservedCharacter`] /// /// [`Error`]: Err::Error +/// [`EscapedEndOfLine`]: Error::EscapedEndOfLine /// [`EscapedNonReservedCharacter`]: Error::EscapedNonReservedCharacter /// [`Failure`]: Err::Failure fn escaped_reserved_chars0<'a, Input: 'a, F, O1>( @@ -81,10 +83,14 @@ where combinator::escaped0(normal, '\\', one_of(RESERVED_CHARS)), |e| { if let Err::Error(Error::Other(span, ErrorKind::Escaped)) = e { - let span = (span.input_len() > 0) - .then(|| span.take(1)) - .unwrap_or(span); - Error::EscapedNonReservedCharacter(span).failure() + match span.input_len() { + 1 => Error::EscapedEndOfLine(span), + n if n > 1 => { + Error::EscapedNonReservedCharacter(span.take(2)) + } + _ => Error::EscapedNonReservedCharacter(span), + } + .failure() } else { e } @@ -94,10 +100,11 @@ where /// # Syntax /// -/// ```text -/// parameter := '{' (name | '\' name_to_escape)* '}' -/// name := ^name_to_escape -/// name_to_escape := '{' | '}' | '(' | '/' | '\' +/// [EBNF] grammar. +/// ```ebnf +/// parameter = '{', name*, '}' +/// name = (- name-to-escape) | ('\', name-to-escape) +/// name-to-escape = '{' | '}' | '(' | '/' | '\' /// ``` /// /// # Example @@ -125,6 +132,7 @@ where /// - [`UnescapedReservedCharacter`] /// - [`UnfinishedParameter`] /// +/// [EBNF]: https://en.wikipedia.org/wiki/Extended_Backus–Naur_form /// [`Error`]: Err::Error /// [`Failure`]: Err::Failure /// [`EscapedNonReservedCharacter`]: Error::EscapedNonReservedCharacter @@ -199,10 +207,11 @@ where /// # Syntax /// -/// ```text -/// optional := '(' (text_in_optional | '\' optional_to_escape)+ ')' -/// text_in_optional := ^optional_to_escape -/// optional_to_escape := '(' | ')' | '{' | '/' | '\' +/// [EBNF] grammar. +/// ```ebnf +/// optional = '(' text-in-optional+ ')' +/// text-in-optional = (- optional-to-escape) | ('\', optional-to-escape) +/// optional-to-escape = '(' | ')' | '{' | '/' | '\' /// ``` /// /// # Example @@ -225,16 +234,19 @@ where /// /// - [`AlternationInOptional`] /// - [`EmptyOptional`] +/// - [`EscapedEndOfLine`] /// - [`EscapedNonReservedCharacter`] /// - [`NestedOptional`] /// - [`ParameterInOptional`] /// - [`UnescapedReservedCharacter`] /// - [`UnfinishedOptional`] /// +/// [EBNF]: https://en.wikipedia.org/wiki/Extended_Backus–Naur_form /// [`Error`]: Err::Error /// [`Failure`]: Err::Failure /// [`AlternationInOptional`]: Error::AlternationInOptional /// [`EmptyOptional`]: Error::EmptyOptional +/// [`EscapedEndOfLine`]: Error::EscapedEndOfLine /// [`EscapedNonReservedCharacter`]: Error::EscapedNonReservedCharacter /// [`NestedOptional`]: Error::NestedOptional /// [`ParameterInOptional`]: Error::ParameterInOptional @@ -315,12 +327,12 @@ where /// # Syntax /// -/// ```text -/// alternative := optional -/// | (text_without_whitespace -/// | '\' whitespace_and_special)+ -/// text_without_whitespace := ^whitespace_and_special -/// whitespace_and_special := ' ' | '(' | '{' | '/' | '\' +/// [EBNF] grammar. +/// ```ebnf +/// alternative = optional | (text-in-alternative+) +/// text-in-alternative = (- alternative-to-escape) +/// | ('\', alternative-to-escape) +/// alternative-to-escape = ' ' | '(' | '{' | '/' | '\' /// ``` /// /// # Example @@ -339,6 +351,7 @@ where /// /// Any [`Failure`] of [`optional()`]. /// +/// [EBNF]: https://en.wikipedia.org/wiki/Extended_Backus–Naur_form /// [`Failure`]: Err::Failure pub fn alternative<'a, Input: 'a>( input: Input, @@ -373,10 +386,11 @@ where /// # Grammar /// -/// ```text -/// alternation := single_alternation (`/` single_alternation)+ -/// single_alternation := ((text_without_whitespace+ optional*) -/// | (optional+ text_without_whitespace+))+ +/// [EBNF] grammar +/// ```ebnf +/// alternation = single-alternation, (`/`, single-alternation)+ +/// single-alternation = ((text-in-alternative+, optional*) +/// | (optional+, text-in-alternative+))+ /// ``` /// /// # Example @@ -401,6 +415,7 @@ where /// - [`EmptyAlternation`] /// - [`OnlyOptionalInAlternation`] /// +/// [EBNF]: https://en.wikipedia.org/wiki/Extended_Backus–Naur_form /// [`Error`]: Err::Error /// [`Failure`]: Err::Failure /// [`EmptyAlternation`]: Error::EmptyAlternation @@ -456,12 +471,16 @@ where /// # Syntax /// -/// ```text -/// single_expression := alternation -/// | optional -/// | parameter -/// | text_without_whitespace+ -/// | whitespace +/// [EBNF] grammar. +/// ```ebnf +/// single-expression = alternation +/// | optional +/// | parameter +/// | text-without-whitespace+ +/// | whitespace +/// text-without-whitespace = (- (text-to-escape | whitespace)) +/// | ('\', text-to-escape) +/// text-to-escape = '(' | '{' | '/' | '\' /// ``` /// /// # Example @@ -479,6 +498,7 @@ where /// /// Any [`Failure`] of [`alternation()`], [`optional()`] or [`parameter()`]. /// +/// [EBNF]: https://en.wikipedia.org/wiki/Extended_Backus–Naur_form /// [`Error`]: Err::Error /// [`Failure`]: Err::Failure /// [`EmptyAlternation`]: Error::EmptyAlternation @@ -501,6 +521,7 @@ where for<'s> &'s str: FindToken<::Item>, { let is_text_without_whitespace = |c| !" ({\\/".contains(c); + let is_whitespace = |c| c == ' '; alt(( map(alternation, SingleExpression::Alternation), @@ -513,14 +534,15 @@ where ), SingleExpression::Text, ), - map(tag(" "), |_| SingleExpression::Whitespace), + map(take_while1(is_whitespace), SingleExpression::Whitespaces), ))(input) } /// # Syntax /// -/// ```text -/// expression := single_expression* +/// [EBNF] grammar. +/// ```ebnf +/// expression = single-expression* /// ``` /// /// # Example @@ -540,6 +562,7 @@ where /// /// Any [`Failure`] of [`alternation()`], [`optional()`] or [`parameter()`]. /// +/// [EBNF]: https://en.wikipedia.org/wiki/Extended_Backus–Naur_form /// [`Error`]: Err::Error /// [`Failure`]: Err::Failure /// [`EmptyAlternation`]: Error::EmptyAlternation @@ -565,7 +588,7 @@ where } /// Possible parsing errors. -#[derive(Debug, Display, Error, Eq, PartialEq)] +#[derive(Clone, Copy, Debug, Display, Error, Eq, PartialEq)] pub enum Error where Input: Display, @@ -702,6 +725,16 @@ where )] EscapedNonReservedCharacter(#[error(not(source))] Input), + /// Escaped EOL. + #[display( + fmt = "\ + {}\n\ + The end of line can not be escaped. + You can use '\\' to escape the the '\'.", + _0 + )] + EscapedEndOfLine(#[error(not(source))] Input), + /// Unknown error. #[display( fmt = "\ @@ -751,21 +784,23 @@ mod spec { use nom::{error::ErrorKind, Err, IResult}; use crate::{ - parse::{alternation, alternative, expression, optional, parameter}, - Alternative, Error, Spanned, + parse::{ + alternation, alternative, expression, optional, parameter, Error, + }, + Alternative, Spanned, }; fn eq(left: impl AsRef, right: impl AsRef) { assert_eq!( left.as_ref() - .replace(' ', "") - .replace('\n', "") - .replace('\t', ""), + .lines() + .map(|line| line.trim_start().trim_end_matches('\n')) + .collect::(), right .as_ref() - .replace(' ', "") - .replace('\n', "") - .replace('\t', ""), + .lines() + .map(|line| line.trim_end_matches('\n').trim()) + .collect::(), ); } @@ -839,7 +874,7 @@ mod spec { match err { Err::Failure(Error::EscapedNonReservedCharacter(e)) => { - assert_eq!(*e, "\\"); + assert_eq!(*e, "\\r"); } Err::Incomplete(_) | Err::Error(_) | Err::Failure(_) => { panic!("wrong error: {:?}", err) @@ -1017,7 +1052,7 @@ mod spec { match err { Err::Failure(Error::EscapedNonReservedCharacter(e)) => { - assert_eq!(*e, "\\"); + assert_eq!(*e, "\\r"); } Err::Incomplete(_) | Err::Error(_) | Err::Failure(_) => { panic!("wrong error: {:?}", err) @@ -1262,8 +1297,8 @@ mod spec { Err::Failure(Error::EscapedNonReservedCharacter(e1)), Err::Failure(Error::EscapedNonReservedCharacter(e2)), ) => { - assert_eq!(*e1, "\\"); - assert_eq!(*e2, "\\"); + assert_eq!(*e1, "\\r"); + assert_eq!(*e2, "\\r"); } _ => panic!("wrong error: {:?}", err), } @@ -1279,35 +1314,35 @@ mod spec { #[test] fn basic() { let ast = format!( - "{:?}", + "{:#?}", unwrap_parser(alternation(Spanned::new("l/🦀"))) ); eq( ast, - r#"Alternation ( + r#"Alternation( [ [ - Text ( + Text( LocatedSpan { offset: 0, line: 1, fragment: "l", - extra: () - } - ) + extra: (), + }, + ), ], [ - Text ( + Text( LocatedSpan { offset: 2, line: 1, fragment: "🦀", - extra: () - } - ) - ] - ] + extra: (), + }, + ), + ], + ], )"#, ); } @@ -1315,7 +1350,7 @@ mod spec { #[test] fn with_optionals() { let ast = format!( - "{:?}", + "{:#?}", unwrap_parser(alternation(Spanned::new( "l(opt)/(opt)r/l(opt)r" ))), @@ -1323,77 +1358,77 @@ mod spec { eq( ast, - r#"Alternation ( + r#"Alternation( [ [ - Text ( + Text( LocatedSpan { offset: 0, line: 1, fragment: "l", - extra: () - } + extra: (), + }, ), - Optional ( - Optional ( + Optional( + Optional( LocatedSpan { offset: 2, line: 1, fragment: "opt", - extra: () - } - ) - ) + extra: (), + }, + ), + ), ], [ - Optional ( - Optional ( + Optional( + Optional( LocatedSpan { offset: 8, line: 1, fragment: "opt", - extra: () - } - ) + extra: (), + }, + ), ), - Text ( + Text( LocatedSpan { offset: 12, line: 1, fragment: "r", - extra: () - } - ) + extra: (), + }, + ), ], [ - Text ( + Text( LocatedSpan { offset: 14, line: 1, fragment: "l", - extra: () - } + extra: (), + }, ), - Optional ( - Optional ( + Optional( + Optional( LocatedSpan { offset: 16, line: 1, fragment: "opt", - extra: () - } - ) + extra: (), + }, + ), ), - Text ( + Text( LocatedSpan { offset: 20, line: 1, fragment: "r", - extra: () - } - ) - ] - ] + extra: (), + }, + ), + ], + ], )"#, ); } @@ -1402,7 +1437,7 @@ mod spec { #[test] fn with_more_optionals() { let ast = format!( - "{:?}", + "{:#?}", unwrap_parser(alternation(Spanned::new( "l(opt)(opt)/(opt)(opt)r/(opt)m(opt)" ))), @@ -1410,99 +1445,99 @@ mod spec { eq( ast, - r#"Alternation ( + r#"Alternation( [ [ - Text ( + Text( LocatedSpan { offset: 0, line: 1, fragment: "l", - extra: () - } + extra: (), + }, ), - Optional ( - Optional ( + Optional( + Optional( LocatedSpan { offset: 2, line: 1, fragment: "opt", - extra: () - } - ) + extra: (), + }, + ), ), - Optional ( - Optional ( + Optional( + Optional( LocatedSpan { offset: 7, line: 1, fragment: "opt", - extra: () - } - ) - ) + extra: (), + }, + ), + ), ], [ - Optional ( - Optional ( + Optional( + Optional( LocatedSpan { offset: 13, line: 1, fragment: "opt", - extra: () - } - ) + extra: (), + }, + ), ), - Optional ( - Optional ( + Optional( + Optional( LocatedSpan { offset: 18, line: 1, fragment: "opt", - extra: () - } - ) + extra: (), + }, + ), ), - Text ( + Text( LocatedSpan { offset: 22, line: 1, fragment: "r", - extra: () - } - ) + extra: (), + }, + ), ], [ - Optional ( - Optional ( + Optional( + Optional( LocatedSpan { offset: 25, line: 1, fragment: "opt", - extra: () - } - ) + extra: (), + }, + ), ), - Text ( + Text( LocatedSpan { offset: 29, line: 1, fragment: "m", - extra: () - } + extra: (), + }, ), - Optional ( - Optional ( + Optional( + Optional( LocatedSpan { offset: 31, line: 1, fragment: "opt", - extra: () - } - ) - ) - ] - ] + extra: (), + }, + ), + ), + ], + ], )"#, ); } @@ -1589,40 +1624,40 @@ mod spec { #[test] fn allows_escaped_optional_parameter_types() { let ast = format!( - "{:?}", + "{:#?}", unwrap_parser(expression(Spanned::new("\\({int})"))) ); eq( ast, - r#"Expression ( + r#"Expression( [ - Text ( + Text( LocatedSpan { offset: 0, line: 1, fragment: "\\(", - extra: () - } + extra: (), + }, ), - Parameter ( - Parameter ( + Parameter( + Parameter( LocatedSpan { offset: 3, line: 1, fragment: "int", - extra: () - } - ) + extra: (), + }, + ), ), - Text ( + Text( LocatedSpan { offset: 7, line: 1, fragment: ")", - extra: () - } - ) - ] + extra: (), + }, + ), + ], )"#, ); } @@ -1630,76 +1665,76 @@ mod spec { #[test] fn allows_parameter_type_in_alternation() { let ast = format!( - "{:?}", + "{:#?}", unwrap_parser(expression(Spanned::new("a/i{int}n/y"))) ); eq( ast, r#"Expression( [ - Alternation ( - Alternation ( + Alternation( + Alternation( [ [ - Text ( + Text( LocatedSpan { offset: 0, line: 1, fragment: "a", - extra: () - } - ) + extra: (), + }, + ), ], [ - Text ( + Text( LocatedSpan { offset: 2, line: 1, fragment: "i", - extra: () - } - ) - ] - ] - ) + extra: (), + }, + ), + ], + ], + ), ), - Parameter ( - Parameter ( + Parameter( + Parameter( LocatedSpan { offset: 4, line: 1, fragment: "int", - extra: () - } - ) + extra: (), + }, + ), ), - Alternation ( - Alternation ( + Alternation( + Alternation( [ [ - Text ( + Text( LocatedSpan { offset: 8, line: 1, fragment: "n", - extra: () - } - ) + extra: (), + }, + ), ], [ - Text ( + Text( LocatedSpan { offset: 10, line: 1, fragment: "y", - extra: () - } - ) - ] - ] - ) - ) - ] + extra: (), + }, + ), + ], + ], + ), + ), + ], )"#, ); } @@ -1707,71 +1742,70 @@ mod spec { #[test] fn does_allow_parameter_adjacent_to_alternation() { let ast = format!( - "{:?}", + "{:#?}", unwrap_parser(expression(Spanned::new("{int}st/nd/rd/th"))) ); eq( ast, - r#"Expression ( + r#"Expression( [ - Parameter ( - Parameter ( + Parameter( + Parameter( LocatedSpan { offset: 1, line: 1, fragment: "int", - extra: () - } - ) + extra: (), + }, + ), ), - Alternation ( - Alternation ( + Alternation( + Alternation( [ [ - Text ( + Text( LocatedSpan { offset: 5, line: 1, - fragment: - "st", - extra: () - } - ) + fragment: "st", + extra: (), + }, + ), ], [ - Text ( + Text( LocatedSpan { offset: 8, line: 1, fragment: "nd", - extra: () - } - ) + extra: (), + }, + ), ], [ - Text ( + Text( LocatedSpan { offset: 11, line: 1, fragment: "rd", - extra: () - } - ) + extra: (), + }, + ), ], [ - Text ( + Text( LocatedSpan { offset: 14, line: 1, fragment: "th", - extra: () - } - ) - ] - ] - ) - ) - ] + extra: (), + }, + ), + ], + ], + ), + ), + ], )"#, ); } @@ -1936,60 +1970,74 @@ mod spec { #[test] fn matches_alternation() { let ast = format!( - "{:?}", + "{:#?}", unwrap_parser(expression(Spanned::new( "mice/rats and rats\\/mice" ))) ); eq( ast, - r#"Expression ( + r#"Expression( [ - Alternation ( - Alternation ( + Alternation( + Alternation( [ [ - Text ( + Text( LocatedSpan { offset: 0, line: 1, fragment: "mice", - extra: () - } - ) + extra: (), + }, + ), ], [ - Text ( + Text( LocatedSpan { offset: 5, line: 1, fragment: "rats", - extra: () - } - ) - ] - ] - ) + extra: (), + }, + ), + ], + ], + ), ), - Whitespace, - Text ( + Whitespaces( + LocatedSpan { + offset: 9, + line: 1, + fragment: " ", + extra: (), + }, + ), + Text( LocatedSpan { offset: 10, line: 1, fragment: "and", - extra: () - } + extra: (), + }, ), - Whitespace, - Text ( + Whitespaces( + LocatedSpan { + offset: 13, + line: 1, + fragment: " ", + extra: (), + }, + ), + Text( LocatedSpan { offset: 14, line: 1, fragment: "rats\\/mice", - extra: () - } - ) - ] + extra: (), + }, + ), + ], )"#, ); } @@ -1997,22 +2045,22 @@ mod spec { #[test] fn matches_anonymous_parameter_type() { let ast = - format!("{:?}", unwrap_parser(expression(Spanned::new("{}")))); + format!("{:#?}", unwrap_parser(expression(Spanned::new("{}")))); eq( ast, - r#"Expression ( + r#"Expression( [ - Parameter ( - Parameter ( + Parameter( + Parameter( LocatedSpan { offset: 1, line: 1, fragment: "", - extra: () - } - ) - ) - ] + extra: (), + }, + ), + ), + ], )"#, ); } @@ -2020,51 +2068,72 @@ mod spec { #[test] fn matches_doubly_escaped_parenthesis() { let ast = format!( - "{:?}", + "{:#?}", unwrap_parser(expression(Spanned::new( "three \\(exceptionally) \\{string} mice" ))) ); eq( ast, - r#"Expression ( + r#"Expression( [ - Text ( + Text( LocatedSpan { offset: 0, line: 1, fragment: "three", - extra: () - } + extra: (), + }, + ), + Whitespaces( + LocatedSpan { + offset: 5, + line: 1, + fragment: " ", + extra: (), + }, ), - Whitespace, - Text ( + Text( LocatedSpan { offset: 6, line: 1, fragment: "\\(exceptionally)", - extra: () - } + extra: (), + }, ), - Whitespace, - Text ( + Whitespaces( + LocatedSpan { + offset: 22, + line: 1, + fragment: " ", + extra: (), + }, + ), + Text( LocatedSpan { offset: 23, line: 1, fragment: "\\{string}", - extra: () - } + extra: (), + }, ), - Whitespace, - Text ( + Whitespaces( + LocatedSpan { + offset: 32, + line: 1, + fragment: " ", + extra: (), + }, + ), + Text( LocatedSpan { offset: 33, line: 1, fragment: "mice", - extra: () - } - ) - ] + extra: (), + }, + ), + ], )"#, ); } @@ -2072,40 +2141,40 @@ mod spec { #[test] fn matches_doubly_escaped_slash() { let ast = format!( - "{:?}", + "{:#?}", unwrap_parser(expression(Spanned::new("12\\\\/2020"))) ); eq( ast, - r#"Expression ( + r#"Expression( [ - Alternation ( - Alternation ( + Alternation( + Alternation( [ [ - Text ( + Text( LocatedSpan { offset: 0, line: 1, fragment: "12\\\\", - extra: () - } - ) + extra: (), + }, + ), ], [ - Text ( + Text( LocatedSpan { offset: 5, line: 1, fragment: "2020", - extra: () - } - ) - ] - ] - ) - ) - ] + extra: (), + }, + ), + ], + ], + ), + ), + ], )"#, ); } @@ -2113,61 +2182,68 @@ mod spec { #[test] fn matches_optional_before_alternation() { let ast = format!( - "{:?}", + "{:#?}", unwrap_parser(expression(Spanned::new( "three (brown )mice/rats" ))) ); eq( ast, - r#"Expression ( + r#"Expression( [ - Text ( + Text( LocatedSpan { offset: 0, line: 1, fragment: "three", - extra: () - } + extra: (), + }, + ), + Whitespaces( + LocatedSpan { + offset: 5, + line: 1, + fragment: " ", + extra: (), + }, ), - Whitespace, - Alternation ( - Alternation ( + Alternation( + Alternation( [ [ - Optional ( - Optional ( + Optional( + Optional( LocatedSpan { offset: 7, line: 1, - fragment: "brown", - extra: () - } - ) + fragment: "brown ", + extra: (), + }, + ), ), - Text ( + Text( LocatedSpan { offset: 14, line: 1, fragment: "mice", - extra: () - } - ) + extra: (), + }, + ), ], [ - Text ( + Text( LocatedSpan { offset: 19, line: 1, fragment: "rats", - extra: () - } - ) - ] - ] - ) - ) - ] + extra: (), + }, + ), + ], + ], + ), + ), + ], )"#, ); } @@ -2175,77 +2251,94 @@ mod spec { #[test] fn matches_optional_in_alternation() { let ast = format!( - "{:?}", + "{:#?}", unwrap_parser(expression(Spanned::new( "{int} rat(s)/mouse/mice" ))) ); eq( ast, - r#"Expression ( + r#"Expression( [ - Parameter ( - Parameter ( + Parameter( + Parameter( LocatedSpan { offset: 1, line: 1, fragment: "int", - extra: () - } - ) + extra: (), + }, + ), + ), + Whitespaces( + LocatedSpan { + offset: 5, + line: 1, + fragment: " ", + extra: (), + }, ), - Whitespace, - Alternation ( - Alternation ( + Alternation( + Alternation( [ [ - Text ( + Text( LocatedSpan { offset: 6, line: 1, fragment: "rat", - extra: () - } + extra: (), + }, ), - Optional ( - Optional ( + Optional( + Optional( LocatedSpan { offset: 10, line: 1, fragment: "s", - extra: () - } - ) - ) + extra: (), + }, + ), + ), ], [ - Text ( + Text( LocatedSpan { offset: 13, line: 1, fragment: "mouse", - extra: () - } - ) + extra: (), + }, + ), ], [ - Text ( + Text( LocatedSpan { offset: 19, line: 1, fragment: "mice", - extra: () - } - ) - ] - ] - ) - ) - ] + extra: (), + }, + ), + ], + ], + ), + ), + ], )"#, ); } + #[test] + fn err_on_escaped_end_of_line() { + match expression(Spanned::new("\\")).unwrap_err() { + Err::Failure(Error::EscapedEndOfLine(_)) => {} + e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => { + panic!("wrong err: {}", e) + } + } + } + #[test] fn empty() { let ast = From fbd08e270d2ddab098e8c65a7e0b22d2fa9c7757 Mon Sep 17 00:00:00 2001 From: ilslv Date: Thu, 18 Nov 2021 14:59:30 +0300 Subject: [PATCH 3/4] Hack around MSRV CI job and derive_more disliking each other --- Cargo.toml | 3 +++ src/lib.rs | 3 +++ 2 files changed, 6 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 7b0df8e..1568a7c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,3 +21,6 @@ include = ["/src/", "/LICENSE-*", "/README.md", "/CHANGELOG.md"] derive_more = { version = "0.99.16", features = ["as_ref", "deref", "deref_mut", "display", "error"], default_features = false } nom = "7.0" nom_locate = "4.0" + +# TODO: remove once https://github.com/JelteF/derive_more/pull/175 is merged. +syn = "1.0.81" diff --git a/src/lib.rs b/src/lib.rs index eb6acba..26d9f0e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -100,6 +100,9 @@ pub mod ast; mod combinator; pub mod parse; +// TODO: remove once https://github.com/JelteF/derive_more/pull/175 is merged. +use syn as _; + #[doc(inline)] pub use self::{ ast::{ From 71b462735335488c571a5ab6673f055756aa99a9 Mon Sep 17 00:00:00 2001 From: tyranron Date: Thu, 18 Nov 2021 17:38:25 +0100 Subject: [PATCH 4/4] Corrections --- Cargo.toml | 2 +- README.md | 7 +- src/ast.rs | 84 +++-- src/combinator.rs | 37 +- src/lib.rs | 2 +- src/parse.rs | 864 ++++++++++++++++++---------------------------- 6 files changed, 413 insertions(+), 583 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1568a7c..e1893b9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,5 +22,5 @@ derive_more = { version = "0.99.16", features = ["as_ref", "deref", "deref_mut", nom = "7.0" nom_locate = "4.0" -# TODO: remove once https://github.com/JelteF/derive_more/pull/175 is merged. +# TODO: Remove once `derive_more` 0.99.17 is released. syn = "1.0.81" diff --git a/README.md b/README.md index a60072b..4897856 100644 --- a/README.md +++ b/README.md @@ -17,9 +17,9 @@ This crate provides [AST] and parser of [Cucumber Expressions]. ## Grammar -This implementation follows context-free grammar, [which isn't yet merged][1]. Original grammar is impossible to follow while creating performant parser, as it consists errors and describes not `Cucumber Expressions`, but some superset language and context-sensitive. In case you've found some inconsistencies between implementations in other languages, please file an issue! +This implementation follows a context-free grammar, [which isn't yet merged][1]. Original grammar is impossible to follow while creating a performant parser, as it consists errors and describes not an exact [Cucumber Expressions] language, but rather some superset language, while being also context-sensitive. In case you've found some inconsistencies between this implementation and the ones in other languages, please file an issue! -[EBNF] spec +[EBNF] spec of the current context-free grammar implemented by this crate: ```ebnf expression = single-expression* @@ -63,7 +63,8 @@ at your option. -[1]: https://github.com/cucumber/cucumber-expressions/issues/41 [AST]: https://en.wikipedia.org/wiki/Abstract_syntax_tree [Cucumber Expressions]: https://github.com/cucumber/cucumber-expressions#readme [EBNF]: https://en.wikipedia.org/wiki/Extended_Backus–Naur_form + +[1]: https://github.com/cucumber/cucumber-expressions/issues/41 diff --git a/src/ast.rs b/src/ast.rs index d146e94..3455c55 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -8,13 +8,13 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -//! [Cucumber Expressions][1] [AST][2] definitions. +//! [Cucumber Expressions][1] [AST]. //! -//! See details in the [grammar spec][3]. +//! See details in the [grammar spec][0]. //! +//! [0]: crate#grammar //! [1]: https://github.com/cucumber/cucumber-expressions#readme -//! [2]: https://en.wikipedia.org/wiki/Abstract_syntax_tree -//! [3]: crate#grammar +//! [AST]: https://en.wikipedia.org/wiki/Abstract_syntax_tree use derive_more::{AsRef, Deref, DerefMut}; use nom::{error::ErrorKind, Err, InputLength}; @@ -22,14 +22,14 @@ use nom_locate::LocatedSpan; use crate::parse; -/// [`str`] along with its location information in the original string. +/// [`str`] along with its location information in the original input. pub type Spanned<'s> = LocatedSpan<&'s str>; -/// Top-level [`cucumber-expression`][1]. +/// Top-level `expression` defined in the [grammar spec][0]. /// /// See [`parse::expression()`] for the detailed grammar and examples. /// -/// [1]: crate#grammar +/// [0]: crate#grammar #[derive(AsRef, Clone, Debug, Deref, DerefMut, Eq, PartialEq)] pub struct Expression(pub Vec>); @@ -63,50 +63,58 @@ impl<'s> Expression> { } } -/// Single entry of a [`cucumber-expression`][1]. +/// `single-expression` defined in the [grammar spec][0], representing a single +/// entry of an [`Expression`]. /// /// See [`parse::single_expression()`] for the detailed grammar and examples. /// -/// [1]: crate#grammar +/// [0]: crate#grammar #[derive(Clone, Debug, Eq, PartialEq)] pub enum SingleExpression { - /// [`alternation`][1] expression. + /// [`alternation`][0] expression. /// - /// [1]: crate#grammar + /// [0]: crate#grammar Alternation(Alternation), - /// [`optional`][1] expression. + /// [`optional`][0] expression. /// - /// [1]: crate#grammar + /// [0]: crate#grammar Optional(Optional), - /// [`parameter`][1] expression. + /// [`parameter`][0] expression. /// - /// [1]: crate#grammar + /// [0]: crate#grammar Parameter(Parameter), /// Text without whitespaces. Text(Input), /// Whitespaces are treated as a special case to avoid placing every `text` - /// character in separate [AST] node, as described in [grammar spec]. + /// character in a separate [AST] node, as described in the + /// [grammar spec][0]. /// + /// [0]: crate#grammar /// [AST]: https://en.wikipedia.org/wiki/Abstract_syntax_tree - /// [grammar spec]: crate#grammar Whitespaces(Input), } -/// Allows to match one of [`SingleAlternation`]s. +/// `single-alternation` defined in the [grammar spec][0], representing a +/// building block of an [`Alternation`]. +/// +/// [0]: crate#grammar +pub type SingleAlternation = Vec>; + +/// `alternation` defined in the [grammar spec][0], allowing to match one of +/// [`SingleAlternation`]s. +/// +/// See [`parse::alternation()`] for the detailed grammar and examples. /// -/// See [`parse::alternation()`] for detailed syntax and examples. +/// [0]: crate#grammar #[derive(AsRef, Clone, Debug, Deref, DerefMut, Eq, PartialEq)] pub struct Alternation(pub Vec>); -/// Building block of an [`Alternation`]. -pub type SingleAlternation = Vec>; - impl Alternation { - /// Returns length of capture from `Input`. + /// Returns length of this [`Alternation`]'s span in the `Input`. pub(crate) fn span_len(&self) -> usize { self.0 .iter() @@ -120,26 +128,22 @@ impl Alternation { - 1 } - /// Indicates whether one of [`SingleAlternation`]s consists only from + /// Indicates whether any of [`SingleAlternation`]s consists only from /// [`Optional`]s. pub(crate) fn contains_only_optional(&self) -> bool { - for single_alt in &**self { - if single_alt + (**self).iter().any(|single_alt| { + single_alt .iter() .all(|alt| matches!(alt, Alternative::Optional(_))) - { - return true; - } - } - false + }) } } -/// [`alternative`][1] expression. +/// `alternative` defined in the [grammar spec][0]. /// /// See [`parse::alternative()`] for the detailed grammar and examples. /// -/// [1]: crate#grammar +/// [0]: crate#grammar #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum Alternative { /// [`optional`][1] expression. @@ -151,14 +155,20 @@ pub enum Alternative { Text(Input), } -/// Allows to match optional `Input`. +/// `optional` defined in the [grammar spec][0], allowing to match an optional +/// `Input`. +/// +/// See [`parse::optional()`] for the detailed grammar and examples. /// -/// See [`parse::optional()`] for detailed syntax and examples. +/// [0]: crate#grammar #[derive(AsRef, Clone, Copy, Debug, Deref, DerefMut, Eq, PartialEq)] pub struct Optional(pub Input); -/// Allows to match some special `Input` descried by a [`Parameter`] name. +/// `parameter` defined in the [grammar spec][0], allowing to match some special +/// `Input` described by a [`Parameter`] name. +/// +/// See [`parse::parameter()`] for the detailed grammar and examples. /// -/// See [`parse::parameter()`] for detailed syntax and examples. +/// [0]: crate#grammar #[derive(AsRef, Clone, Copy, Debug, Deref, DerefMut, Eq, PartialEq)] pub struct Parameter(pub Input); diff --git a/src/combinator.rs b/src/combinator.rs index d6ffa14..40f8070 100644 --- a/src/combinator.rs +++ b/src/combinator.rs @@ -18,29 +18,32 @@ use nom::{ InputTakeAtPosition, Offset, Parser, Slice, }; -/// Applies `map` to `parser`s [`IResult`] in case it errored. +/// Applies the given `map` function to the `parser`'s [`IResult`] in case it +/// represents an error. /// -/// Can be used to harden [`Error`] to [`Failure`]. +/// Can be used to harden an [`Error`] into a [`Failure`]. /// /// [`Error`]: nom::Err::Error /// [`Failure`]: nom::Err::Failure /// [`verify()`]: nom::combinator::verify() pub(crate) fn map_err, F, G>( mut parser: F, - map: G, + mut map: G, ) -> impl FnMut(I) -> IResult where F: Parser, - G: Fn(Err) -> Err, + G: FnMut(Err) -> Err, { - move |input: I| parser.parse(input).map_err(&map) + move |input: I| parser.parse(input).map_err(&mut map) } +/// Matches a byte string with escaped characters. +/// /// Differences from [`escaped()`]: -/// 1. If `normal` matched empty sequence, tries to matched escaped; +/// 1. If `normal` matched empty sequence, tries to match escaped; /// 2. If `normal` matched empty sequence and then `escapable` didn't match -/// anything, returns empty sequence; -/// 3. Errors with [`ErrorKind::Escaped`] if `control_char` was followed by a +/// anything, returns an empty sequence; +/// 3. Errors with [`ErrorKind::Escaped`] if a `control_char` was followed by a /// non-`escapable` `Input` or end of line. /// /// [`escaped()`]: nom::bytes::complete::escaped() @@ -146,7 +149,7 @@ mod escaped0_spec { /// Type used to compare behaviour of [`escaped`] and [`escaped0`]. /// - /// Tuple is constructed from following parsers results: + /// Tuple is constructed from the following parsers results: /// - [`escaped0`]`(`[`digit0`]`, '\\', `[`one_of`]`(r#""n\"#))` /// - [`escaped0`]`(`[`digit1`]`, '\\', `[`one_of`]`(r#""n\"#))` /// - [`escaped`]`(`[`digit0`]`, '\\', `[`one_of`]`(r#""n\"#))` @@ -158,7 +161,7 @@ mod escaped0_spec { IResult<&'s str, &'s str>, ); - /// Produces [`TestResult`] from `input`. + /// Produces a [`TestResult`] from the given `input`. fn get_result(input: &str) -> TestResult<'_> { ( escaped0(digit0, '\\', one_of(r#""n\"#))(input), @@ -261,16 +264,16 @@ mod escaped0_spec { ( Err(Err::Error(Error { input: r#"\r"#, - code: ErrorKind::Escaped + code: ErrorKind::Escaped, })), Err(Err::Error(Error { input: r#"\r"#, - code: ErrorKind::Escaped + code: ErrorKind::Escaped, })), Ok((r#"\n\r"#, "")), Err(Err::Error(Error { input: r#"r"#, - code: ErrorKind::OneOf + code: ErrorKind::OneOf, })), ), ); @@ -283,17 +286,17 @@ mod escaped0_spec { ( Err(Err::Error(Error { input: "\\", - code: ErrorKind::Escaped + code: ErrorKind::Escaped, })), Err(Err::Error(Error { input: "\\", - code: ErrorKind::Escaped + code: ErrorKind::Escaped, })), Ok(("\\", "")), Err(Err::Error(Error { input: "\\", - code: ErrorKind::Escaped - })) + code: ErrorKind::Escaped, + })), ), ); } diff --git a/src/lib.rs b/src/lib.rs index 26d9f0e..006ecd1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -100,7 +100,7 @@ pub mod ast; mod combinator; pub mod parse; -// TODO: remove once https://github.com/JelteF/derive_more/pull/175 is merged. +// TODO: Remove once `derive_more` 0.99.17 is released. use syn as _; #[doc(inline)] diff --git a/src/parse.rs b/src/parse.rs index 548a485..29e5ebf 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -8,13 +8,13 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -//! [Cucumber Expressions][1] [AST][2] parser. +//! [Cucumber Expressions][1] [AST] parser. //! -//! See details in the [grammar spec][3]. +//! See details in the [grammar spec][0]. //! +//! [0]: crate#grammar //! [1]: https://github.com/cucumber/cucumber-expressions#readme -//! [2]: https://en.wikipedia.org/wiki/Abstract_syntax_tree -//! [3]: crate#grammar +//! [AST]: https://en.wikipedia.org/wiki/Abstract_syntax_tree use std::{fmt::Display, ops::RangeFrom}; @@ -39,7 +39,7 @@ use crate::{ combinator, }; -/// Reserved characters requiring special handling. +/// Reserved characters requiring a special handling. pub const RESERVED_CHARS: &str = r#"{}()\/ "#; /// Matches `normal` and [`RESERVED_CHARS`] escaped with `\`. @@ -54,9 +54,9 @@ pub const RESERVED_CHARS: &str = r#"{}()\/ "#; /// /// ## Irrecoverable [`Failure`] /// -/// - If `normal` parser fails -/// - [`EscapedEndOfLine`] -/// - [`EscapedNonReservedCharacter`] +/// - If `normal` parser fails. +/// - [`EscapedEndOfLine`]. +/// - [`EscapedNonReservedCharacter`]. /// /// [`Error`]: Err::Error /// [`EscapedEndOfLine`]: Error::EscapedEndOfLine @@ -98,9 +98,10 @@ where ) } -/// # Syntax +/// Parses a `parameter` as defined in the [grammar spec][0]. +/// +/// # Grammar /// -/// [EBNF] grammar. /// ```ebnf /// parameter = '{', name*, '}' /// name = (- name-to-escape) | ('\', name-to-escape) @@ -120,19 +121,18 @@ where /// /// # Errors /// -/// ## Recoverable [`Error`]s +/// ## Recoverable [`Error`] /// -/// - If `input` doesn't start with `{` +/// - If `input` doesn't start with `{`. /// -/// ## Irrecoverable [`Failure`]s +/// ## Irrecoverable [`Failure`]. /// -/// - [`EscapedNonReservedCharacter`] -/// - [`NestedParameter`] -/// - [`OptionalInParameter`] -/// - [`UnescapedReservedCharacter`] -/// - [`UnfinishedParameter`] +/// - [`EscapedNonReservedCharacter`]. +/// - [`NestedParameter`]. +/// - [`OptionalInParameter`]. +/// - [`UnescapedReservedCharacter`]. +/// - [`UnfinishedParameter`]. /// -/// [EBNF]: https://en.wikipedia.org/wiki/Extended_Backus–Naur_form /// [`Error`]: Err::Error /// [`Failure`]: Err::Failure /// [`EscapedNonReservedCharacter`]: Error::EscapedNonReservedCharacter @@ -140,6 +140,7 @@ where /// [`OptionalInParameter`]: Error::OptionalInParameter /// [`UnescapedReservedCharacter`]: Error::UnescapedReservedCharacter /// [`UnfinishedParameter`]: Error::UnfinishedParameter +/// [0]: crate#grammar pub fn parameter<'a, Input: 'a>( input: Input, ) -> IResult, Error> @@ -205,9 +206,10 @@ where Ok((input, Parameter(par_name))) } -/// # Syntax +/// Parses an `optional` as defined in the [grammar spec][0]. +/// +/// # Grammar /// -/// [EBNF] grammar. /// ```ebnf /// optional = '(' text-in-optional+ ')' /// text-in-optional = (- optional-to-escape) | ('\', optional-to-escape) @@ -226,22 +228,21 @@ where /// /// # Errors /// -/// ## Recoverable [`Error`]s +/// ## Recoverable [`Error`] /// -/// - If `input` doesn't start with `(` +/// - If `input` doesn't start with `(`. /// -/// ## Irrecoverable [`Failure`]s +/// ## Irrecoverable [`Failure`] /// -/// - [`AlternationInOptional`] -/// - [`EmptyOptional`] -/// - [`EscapedEndOfLine`] -/// - [`EscapedNonReservedCharacter`] -/// - [`NestedOptional`] -/// - [`ParameterInOptional`] -/// - [`UnescapedReservedCharacter`] -/// - [`UnfinishedOptional`] +/// - [`AlternationInOptional`]. +/// - [`EmptyOptional`]. +/// - [`EscapedEndOfLine`]. +/// - [`EscapedNonReservedCharacter`]. +/// - [`NestedOptional`]. +/// - [`ParameterInOptional`]. +/// - [`UnescapedReservedCharacter`]. +/// - [`UnfinishedOptional`]. /// -/// [EBNF]: https://en.wikipedia.org/wiki/Extended_Backus–Naur_form /// [`Error`]: Err::Error /// [`Failure`]: Err::Failure /// [`AlternationInOptional`]: Error::AlternationInOptional @@ -252,6 +253,7 @@ where /// [`ParameterInOptional`]: Error::ParameterInOptional /// [`UnescapedReservedCharacter`]: Error::UnescapedReservedCharacter /// [`UnfinishedOptional`]: Error::UnfinishedOptional +/// [0]: crate#grammar pub fn optional<'a, Input: 'a>( input: Input, ) -> IResult, Error> @@ -269,14 +271,14 @@ where Error: ParseError, for<'s> &'s str: FindToken<::Item>, { - let is_text_in_optional = |c| !"(){\\/".contains(c); + let is_in_optional = |c| !"(){\\/".contains(c); let fail = |input: Input, opening_brace| { match input.iter_elements().next().map(AsChar::as_char) { Some('(') => { if let Ok((_, (opt, ..))) = peek(tuple(( optional, - escaped_reserved_chars0(take_while(is_text_in_optional)), + escaped_reserved_chars0(take_while(is_in_optional)), tag(")"), )))(input.clone()) { @@ -313,7 +315,7 @@ where let original_input = input.clone(); let (input, opening_paren) = tag("(")(input)?; let (input, opt) = - escaped_reserved_chars0(take_while(is_text_in_optional))(input)?; + escaped_reserved_chars0(take_while(is_in_optional))(input)?; let (input, _) = combinator::map_err(tag(")"), |_| { fail(input.clone(), opening_paren.clone()) })(input.clone())?; @@ -325,9 +327,10 @@ where Ok((input, Optional(opt))) } -/// # Syntax +/// Parses an `alternative` as defined in the [grammar spec][0]. +/// +/// # Grammar /// -/// [EBNF] grammar. /// ```ebnf /// alternative = optional | (text-in-alternative+) /// text-in-alternative = (- alternative-to-escape) @@ -347,12 +350,12 @@ where /// /// # Errors /// -/// ## Irrecoverable [`Failure`]s +/// ## Irrecoverable [`Failure`] /// /// Any [`Failure`] of [`optional()`]. /// -/// [EBNF]: https://en.wikipedia.org/wiki/Extended_Backus–Naur_form /// [`Failure`]: Err::Failure +/// [0]: crate#grammar pub fn alternative<'a, Input: 'a>( input: Input, ) -> IResult, Error> @@ -370,13 +373,13 @@ where Error: ParseError, for<'s> &'s str: FindToken<::Item>, { - let is_text_without_whitespace = |c| !" ({\\/".contains(c); + let is_without_whitespace = |c| !" ({\\/".contains(c); alt(( map(optional, Alternative::Optional), map( verify( - escaped_reserved_chars0(take_while(is_text_without_whitespace)), + escaped_reserved_chars0(take_while(is_without_whitespace)), |p| p.input_len() > 0, ), Alternative::Text, @@ -384,9 +387,10 @@ where ))(input) } +/// Parses an `alternation` as defined in the [grammar spec][0]. +/// /// # Grammar /// -/// [EBNF] grammar /// ```ebnf /// alternation = single-alternation, (`/`, single-alternation)+ /// single-alternation = ((text-in-alternative+, optional*) @@ -405,21 +409,21 @@ where /// /// # Errors /// -/// ## Recoverable [`Error`]s +/// ## Recoverable [`Error`] /// -/// - If `input` doesn't have `/` +/// - If `input` doesn't have `/`. /// -/// ## Irrecoverable [`Failure`]s +/// ## Irrecoverable [`Failure`] /// -/// - Any [`Failure`] of [`optional()`] -/// - [`EmptyAlternation`] -/// - [`OnlyOptionalInAlternation`] +/// - Any [`Failure`] of [`optional()`]. +/// - [`EmptyAlternation`]. +/// - [`OnlyOptionalInAlternation`]. /// -/// [EBNF]: https://en.wikipedia.org/wiki/Extended_Backus–Naur_form /// [`Error`]: Err::Error /// [`Failure`]: Err::Failure /// [`EmptyAlternation`]: Error::EmptyAlternation /// [`OnlyOptionalInAlternation`]: Error::OnlyOptionalInAlternation +/// [0]: crate#grammar pub fn alternation( input: Input, ) -> IResult, Error> @@ -469,9 +473,10 @@ where .unwrap_or(Ok((rest, alt))) } -/// # Syntax +/// Parses a `single-expression` as defined in the [grammar spec][0]. +/// +/// # Grammar /// -/// [EBNF] grammar. /// ```ebnf /// single-expression = alternation /// | optional @@ -494,15 +499,12 @@ where /// /// # Errors /// -/// ## Irrecoverable [`Failure`]s +/// ## Irrecoverable [`Failure`] /// /// Any [`Failure`] of [`alternation()`], [`optional()`] or [`parameter()`]. /// -/// [EBNF]: https://en.wikipedia.org/wiki/Extended_Backus–Naur_form -/// [`Error`]: Err::Error /// [`Failure`]: Err::Failure -/// [`EmptyAlternation`]: Error::EmptyAlternation -/// [`OnlyOptionalInAlternation`]: Error::OnlyOptionalInAlternation +/// [0]: crate#grammar pub fn single_expression<'a, Input: 'a>( input: Input, ) -> IResult, Error> @@ -520,7 +522,7 @@ where Error: ParseError, for<'s> &'s str: FindToken<::Item>, { - let is_text_without_whitespace = |c| !" ({\\/".contains(c); + let is_without_whitespace = |c| !" ({\\/".contains(c); let is_whitespace = |c| c == ' '; alt(( @@ -529,7 +531,7 @@ where map(parameter, SingleExpression::Parameter), map( verify( - escaped_reserved_chars0(take_while(is_text_without_whitespace)), + escaped_reserved_chars0(take_while(is_without_whitespace)), |s| s.input_len() > 0, ), SingleExpression::Text, @@ -538,9 +540,10 @@ where ))(input) } -/// # Syntax +/// Parses an `expression` as defined in the [grammar spec][0]. +/// +/// # Grammar /// -/// [EBNF] grammar. /// ```ebnf /// expression = single-expression* /// ``` @@ -554,19 +557,16 @@ where /// text /// ``` /// -/// Note: empty string is matched too. +/// > __NOTE:__ Empty string is matched too. /// /// # Errors /// -/// ## Irrecoverable [`Failure`]s +/// ## Irrecoverable [`Failure`] /// /// Any [`Failure`] of [`alternation()`], [`optional()`] or [`parameter()`]. /// -/// [EBNF]: https://en.wikipedia.org/wiki/Extended_Backus–Naur_form -/// [`Error`]: Err::Error /// [`Failure`]: Err::Failure -/// [`EmptyAlternation`]: Error::EmptyAlternation -/// [`OnlyOptionalInAlternation`]: Error::OnlyOptionalInAlternation +/// [0]: crate#grammar pub fn expression<'a, Input: 'a>( input: Input, ) -> IResult, Error> @@ -595,151 +595,137 @@ where { /// Nested [`Parameter`]s. #[display( - fmt = "\ - {}\n\ - A parameter may not contain an other parameter.\n\ - If you did not mean to use an optional type you can use '\\{{' to \ - escape the '{{'. For more complicated expressions consider using a \ - regular expression instead.", + fmt = "{}\n\ + A parameter may not contain an other parameter.\n\ + If you did not mean to use an optional type you can use '\\{{' \ + to escape the '{{'. For more complicated expressions consider \ + using a regular expression instead.", _0 )] NestedParameter(#[error(not(source))] Input), - /// [`Optional`] inside [`Parameter`]. + /// [`Optional`] inside a [`Parameter`]. #[display( - fmt = "\ - {}\n\ - A parameter may not contain an optional type.\n\ - If you did not mean to use an parameter type you can use '\\(' to \ - escape the '('.", + fmt = "{}\n\ + A parameter may not contain an optional.\n\ + If you did not mean to use an parameter type you can use '\\(' \ + to escape the '('.", _0 )] OptionalInParameter(#[error(not(source))] Input), /// Unfinished [`Parameter`]. #[display( - fmt = "\ - {}\n\ - The '{{' does not have a matching '}}'.\n\ - If you did not intend to use a parameter you can use '\\{{' to escape \ - the '{{'.", + fmt = "{}\n\ + The '{{' does not have a matching '}}'.\n\ + If you did not intend to use a parameter you can use '\\{{' to \ + escape the '{{'.", _0 )] UnfinishedParameter(#[error(not(source))] Input), /// Nested [`Optional`]. #[display( - fmt = "\ - {}\n\ - An optional may not contain an other optional.\n\ - If you did not mean to use an optional type you can use '\\(' to \ - escape the '('. For more complicated expressions consider using a \ - regular expression instead.", + fmt = "{}\n\ + An optional may not contain an other optional.\n\ + If you did not mean to use an optional type you can use '\\(' \ + to escape the '('. For more complicated expressions consider \ + using a regular expression instead.", _0 )] NestedOptional(#[error(not(source))] Input), - /// [`Parameter`] inside [`Optional`]. + /// [`Parameter`] inside an [`Optional`]. #[display( - fmt = "\ - {}\n\ - An optional may not contain a parameter type.\n\ - If you did not mean to use an parameter type you can use '\\{{' to \ - escape the '{{'.", + fmt = "{}\n\ + An optional may not contain a parameter.\n\ + If you did not mean to use an parameter type you can use \ + '\\{{' to escape the '{{'.", _0 )] ParameterInOptional(#[error(not(source))] Input), /// Empty [`Optional`]. #[display( - fmt = "\ - {}\n\ - An optional must contain some text.\n\ - If you did not mean to use an optional you can use '\\(' to escape the \ - '('.", + fmt = "{}\n\ + An optional must contain some text.\n\ + If you did not mean to use an optional you can use '\\(' to \ + escape the '('.", _0 )] EmptyOptional(#[error(not(source))] Input), - /// [`Alternation`] inside [`Optional`]. + /// [`Alternation`] inside an [`Optional`]. #[display( - fmt = "\ - {}\n\ - An alternation can not be used inside an optional.\n\ - You can use '\\/' to escape the '/'.", + fmt = "{}\n\ + An alternation can not be used inside an optional.\n\ + You can use '\\/' to escape the '/'.", _0 )] AlternationInOptional(#[error(not(source))] Input), /// Unfinished [`Optional`]. #[display( - fmt = "\ - {}\n\ - The '(' does not have a matching ')'.\n\ - If you did not intend to use an optional you can use '\\(' to escape \ - the '('.", + fmt = "{}\n\ + The '(' does not have a matching ')'.\n\ + If you did not intend to use an optional you can use '\\(' to \ + escape the '('.", _0 )] UnfinishedOptional(#[error(not(source))] Input), /// Empty [`Alternation`]. #[display( - fmt = "\ - {}\n\ - Alternative may not be empty.\n\ - If you did not mean to use an alternative you can use '\\/' to escape \ - the '/'.", + fmt = "{}\n\ + An alternation can not be empty.\n\ + If you did not mean to use an alternative you can use '\\/' to \ + escape the '/'.", _0 )] EmptyAlternation(#[error(not(source))] Input), /// Only [`Optional`] inside [`Alternation`]. #[display( - fmt = "\ - {}\n\ - An alternative may not exclusively contain optionals.\n\ - If you did not mean to use an optional you can use '\\(' to escape the \ - '('.", + fmt = "{}\n\ + An alternation may not exclusively contain optionals.\n\ + If you did not mean to use an optional you can use '\\(' to \ + escape the '('.", _0 )] OnlyOptionalInAlternation(#[error(not(source))] Input), /// Unescaped [`RESERVED_CHARS`]. #[display( - fmt = "\ - {}\n\ - Unescaped reserved character.\n\ - You can use an '\\' to escape it.", + fmt = "{}\n\ + Unescaped reserved character.\n\ + You can use an '\\' to escape it.", _0 )] UnescapedReservedCharacter(#[error(not(source))] Input), /// Escaped non-[`RESERVED_CHARS`]. #[display( - fmt = "\ - {}\n\ - Only the characters '{{', '}}', '(', ')', '\\', '/' and whitespace can \ - be escaped.\n\ - If you did mean to use an '\\' you can use '\\\\' to escape it.", + fmt = "{}\n\ + Only the characters '{{', '}}', '(', ')', '\\', '/' and \ + whitespace can be escaped.\n\ + If you did mean to use an '\\' you can use '\\\\' to escape it.", _0 )] EscapedNonReservedCharacter(#[error(not(source))] Input), /// Escaped EOL. #[display( - fmt = "\ - {}\n\ - The end of line can not be escaped. - You can use '\\' to escape the the '\'.", + fmt = "{}\n\ + The end of line can not be escaped.\n\ + You can use '\\' to escape the the '\'.", _0 )] EscapedEndOfLine(#[error(not(source))] Input), /// Unknown error. #[display( - fmt = "\ - {}\n\ - Unknown parsing error.", + fmt = "{}\n\ + Unknown parsing error.", _0 )] Other(#[error(not(source))] Input, ErrorKind), @@ -750,13 +736,13 @@ where "match _0 {\ Needed::Size(n) => format!(\"Parsing requires {} bytes/chars\", n),\ Needed::Unknown => \"Parsing requires more data\".to_owned(),\ - }" + }" )] Needed(#[error(not(source))] Needed), } impl Error { - /// Converts this [`Error`] into [`Failure`]. + /// Converts this [`Error`] into a [`Failure`]. /// /// [`Error`]: enum@Error /// [`Failure`]: Err::Failure @@ -781,6 +767,8 @@ impl ParseError for Error { #[cfg(test)] mod spec { + use std::fmt; + use nom::{error::ErrorKind, Err, IResult}; use crate::{ @@ -790,13 +778,16 @@ mod spec { Alternative, Spanned, }; - fn eq(left: impl AsRef, right: impl AsRef) { + /// Asserts two given text representations of [AST] to be equal. + /// + /// [AST]: https://en.wikipedia.org/wiki/Abstract_syntax_tree + fn assert_ast_eq(actual: impl fmt::Debug, expected: impl AsRef) { assert_eq!( - left.as_ref() + format!("{:#?}", actual) .lines() .map(|line| line.trim_start().trim_end_matches('\n')) .collect::(), - right + expected .as_ref() .lines() .map(|line| line.trim_end_matches('\n').trim()) @@ -804,11 +795,12 @@ mod spec { ); } + /// Unwraps the given `parser` result asserting it has finished and succeed. fn unwrap_parser<'s, T>( - par: IResult, T, Error>>, + parser: IResult, T, Error>>, ) -> T { let (rest, par) = - par.unwrap_or_else(|e| panic!("Expected Ok, found Err: {}", e)); + parser.unwrap_or_else(|e| panic!("Expected Ok, found Err: {}", e)); assert_eq!(*rest, ""); par } @@ -856,12 +848,13 @@ mod spec { #[allow(clippy::non_ascii_literal)] #[test] fn named_with_emoji() { - assert_eq!(**unwrap_parser(parameter(Spanned::new("{🦀}"))), "🦀",); + assert_eq!(**unwrap_parser(parameter(Spanned::new("{🦀}"))), "🦀"); } #[test] fn errors_on_empty() { let span = Spanned::new(""); + assert_eq!( parameter(span), Err(Err::Error(Error::Other(span, ErrorKind::Tag))), @@ -877,106 +870,71 @@ mod spec { assert_eq!(*e, "\\r"); } Err::Incomplete(_) | Err::Error(_) | Err::Failure(_) => { - panic!("wrong error: {:?}", err) + panic!("wrong error: {:?}", err); } } } #[test] fn fails_on_nested() { - let err = [ - parameter(Spanned::new("{{nest}}")).expect_err("error"), - parameter(Spanned::new("{before{nest}}")).expect_err("error"), - parameter(Spanned::new("{{nest}after}")).expect_err("error"), - parameter(Spanned::new("{bef{nest}aft}")).expect_err("error"), - ]; - - match err { - #[rustfmt::skip] - [ - Err::Failure(Error::NestedParameter(e1)), - Err::Failure(Error::NestedParameter(e2)), - Err::Failure(Error::NestedParameter(e3)), - Err::Failure(Error::NestedParameter(e4)), - ] => { - assert_eq!(*e1, "{nest}"); - assert_eq!(*e2, "{nest}"); - assert_eq!(*e3, "{nest}"); - assert_eq!(*e4, "{nest}"); + for input in [ + "{{nest}}", + "{before{nest}}", + "{{nest}after}", + "{bef{nest}aft}", + ] { + match parameter(Spanned::new(input)).expect_err("error") { + Err::Failure(Error::NestedParameter(e)) => { + assert_eq!(*e, "{nest}", "on input: {}", input); + } + e => panic!("wrong error: {:?}", e), } - _ => panic!("wrong error: {:?}", err), } } #[test] fn fails_on_optional() { - let err = [ - parameter(Spanned::new("{(nest)}")).expect_err("error"), - parameter(Spanned::new("{before(nest)}")).expect_err("error"), - parameter(Spanned::new("{(nest)after}")).expect_err("error"), - parameter(Spanned::new("{bef(nest)aft}")).expect_err("error"), - ]; - - match err { - #[rustfmt::skip] - [ - Err::Failure(Error::OptionalInParameter(e1)), - Err::Failure(Error::OptionalInParameter(e2)), - Err::Failure(Error::OptionalInParameter(e3)), - Err::Failure(Error::OptionalInParameter(e4)), - ] => { - assert_eq!(*e1, "(nest)"); - assert_eq!(*e2, "(nest)"); - assert_eq!(*e3, "(nest)"); - assert_eq!(*e4, "(nest)"); + for input in [ + "{(nest)}", + "{before(nest)}", + "{(nest)after}", + "{bef(nest)aft}", + ] { + match parameter(Spanned::new(input)).expect_err("error") { + Err::Failure(Error::OptionalInParameter(e)) => { + assert_eq!(*e, "(nest)", "on input: {}", input); + } + e => panic!("wrong error: {:?}", e), } - _ => panic!("wrong error: {:?}", err), } } #[test] fn fails_on_unescaped_reserved_char() { - let err = [ - parameter(Spanned::new("{(opt}")).expect_err("error"), - parameter(Spanned::new("{(n(e)st)}")).expect_err("error"), - parameter(Spanned::new("{{nest}")).expect_err("error"), - parameter(Spanned::new("{l/r}")).expect_err("error"), - ]; - - match err { - #[rustfmt::skip] - [ - Err::Failure(Error::UnescapedReservedCharacter(e1)), - Err::Failure(Error::UnescapedReservedCharacter(e2)), - Err::Failure(Error::UnescapedReservedCharacter(e3)), - Err::Failure(Error::UnescapedReservedCharacter(e4)), - ] => { - assert_eq!(*e1, "("); - assert_eq!(*e2, "("); - assert_eq!(*e3, "{"); - assert_eq!(*e4, "/"); + for (input, expected) in [ + ("{(opt}", "("), + ("{(n(e)st)}", "("), + ("{{nest}", "{"), + ("{l/r}", "/"), + ] { + match parameter(Spanned::new(input)).expect_err("error") { + Err::Failure(Error::UnescapedReservedCharacter(e)) => { + assert_eq!(*e, expected, "on input: {}", input); + } + e => panic!("wrong error: {:?}", e), } - _ => panic!("wrong error: {:?}", err), } } #[test] fn fails_on_unfinished() { - let err = [ - parameter(Spanned::new("{")).expect_err("error"), - parameter(Spanned::new("{name ")).expect_err("error"), - ]; - - match err { - #[rustfmt::skip] - [ - Err::Failure(Error::UnfinishedParameter(e1)), - Err::Failure(Error::UnfinishedParameter(e2)) - ] => { - assert_eq!(*e1, "{"); - assert_eq!(*e2, "{"); + for input in ["{", "{name "] { + match parameter(Spanned::new(input)).expect_err("error") { + Err::Failure(Error::UnfinishedParameter(e)) => { + assert_eq!(*e, "{", "on input: {}", input); + } + e => panic!("wrong error: {:?}", e), } - _ => panic!("wrong error: {:?}", err), } } } @@ -1062,122 +1020,73 @@ mod spec { #[test] fn fails_on_nested() { - let err = [ - optional(Spanned::new("((nest))")).expect_err("error"), - optional(Spanned::new("(before(nest))")).expect_err("error"), - optional(Spanned::new("((nest)after)")).expect_err("error"), - optional(Spanned::new("(bef(nest)aft)")).expect_err("error"), - ]; - - match err { - #[rustfmt::skip] - [ - Err::Failure(Error::NestedOptional(e1)), - Err::Failure(Error::NestedOptional(e2)), - Err::Failure(Error::NestedOptional(e3)), - Err::Failure(Error::NestedOptional(e4)), - ] => { - assert_eq!(*e1, "(nest)"); - assert_eq!(*e2, "(nest)"); - assert_eq!(*e3, "(nest)"); - assert_eq!(*e4, "(nest)"); + for input in [ + "((nest))", + "(before(nest))", + "((nest)after)", + "(bef(nest)aft)", + ] { + match optional(Spanned::new(input)).expect_err("error") { + Err::Failure(Error::NestedOptional(e)) => { + assert_eq!(*e, "(nest)", "on input: {}", input); + } + e => panic!("wrong error: {:?}", e), } - _ => panic!("wrong error: {:?}", err), } } #[test] fn fails_on_parameter() { - let err = [ - optional(Spanned::new("({nest})")).expect_err("error"), - optional(Spanned::new("(before{nest})")).expect_err("error"), - optional(Spanned::new("({nest}after)")).expect_err("error"), - optional(Spanned::new("(bef{nest}aft)")).expect_err("error"), - ]; - - match err { - #[rustfmt::skip] - [ - Err::Failure(Error::ParameterInOptional(e1)), - Err::Failure(Error::ParameterInOptional(e2)), - Err::Failure(Error::ParameterInOptional(e3)), - Err::Failure(Error::ParameterInOptional(e4)), - ] => { - assert_eq!(*e1, "{nest}"); - assert_eq!(*e2, "{nest}"); - assert_eq!(*e3, "{nest}"); - assert_eq!(*e4, "{nest}"); + for input in [ + "({nest})", + "(before{nest})", + "({nest}after)", + "(bef{nest}aft)", + ] { + match optional(Spanned::new(input)).expect_err("error") { + Err::Failure(Error::ParameterInOptional(e)) => { + assert_eq!(*e, "{nest}", "on input: {}", input); + } + e => panic!("wrong error: {:?}", e), } - _ => panic!("wrong error: {:?}", err), } } #[test] fn fails_on_alternation() { - let err = [ - optional(Spanned::new("(/)")).expect_err("error"), - optional(Spanned::new("(bef/)")).expect_err("error"), - optional(Spanned::new("(/aft)")).expect_err("error"), - optional(Spanned::new("(bef/aft)")).expect_err("error"), - ]; - - match err { - #[rustfmt::skip] - [ - Err::Failure(Error::AlternationInOptional(e1)), - Err::Failure(Error::AlternationInOptional(e2)), - Err::Failure(Error::AlternationInOptional(e3)), - Err::Failure(Error::AlternationInOptional(e4)), - ] => { - assert_eq!(*e1, "/"); - assert_eq!(*e2, "/"); - assert_eq!(*e3, "/"); - assert_eq!(*e4, "/"); + for input in ["(/)", "(bef/)", "(/aft)", "(bef/aft)"] { + match optional(Spanned::new(input)).expect_err("error") { + Err::Failure(Error::AlternationInOptional(e)) => { + assert_eq!(*e, "/", "on input: {}", input); + } + e => panic!("wrong error: {:?}", e), } - _ => panic!("wrong error: {:?}", err), } } #[test] fn fails_on_unescaped_reserved_char() { - let err = [ - optional(Spanned::new("({opt)")).expect_err("error"), - optional(Spanned::new("({n{e}st})")).expect_err("error"), - optional(Spanned::new("((nest)")).expect_err("error"), - ]; - - match err { - #[rustfmt::skip] - [ - Err::Failure(Error::UnescapedReservedCharacter(e1)), - Err::Failure(Error::UnescapedReservedCharacter(e2)), - Err::Failure(Error::UnescapedReservedCharacter(e3)), - ] => { - assert_eq!(*e1, "{"); - assert_eq!(*e2, "{"); - assert_eq!(*e3, "("); + for (input, expected) in + [("({opt)", "{"), ("({n{e}st})", "{"), ("((nest)", "(")] + { + match optional(Spanned::new(input)).expect_err("error") { + Err::Failure(Error::UnescapedReservedCharacter(e)) => { + assert_eq!(*e, expected, "on input: {}", input); + } + e => panic!("wrong error: {:?}", e), } - _ => panic!("wrong error: {:?}", err), } } #[test] fn fails_on_unfinished() { - let err = [ - optional(Spanned::new("(")).expect_err("error"), - optional(Spanned::new("(name ")).expect_err("error"), - ]; - - match err { - #[rustfmt::skip] - [ - Err::Failure(Error::UnfinishedOptional(e1)), - Err::Failure(Error::UnfinishedOptional(e2)) - ] => { - assert_eq!(*e1, "("); - assert_eq!(*e2, "("); + for input in ["(", "(name "] { + match optional(Spanned::new(input)).expect_err("error") { + Err::Failure(Error::UnfinishedOptional(e)) => { + assert_eq!(*e, "(", "on input: {}", input); + } + e => panic!("wrong error: {:?}", e), } - _ => panic!("wrong error: {:?}", err), } } } @@ -1191,38 +1100,24 @@ mod spec { #[allow(clippy::non_ascii_literal)] #[test] fn text() { - match ( - unwrap_parser(alternative(Spanned::new("string"))), - unwrap_parser(alternative(Spanned::new("🦀"))), - ) { - (Alternative::Text(t1), Alternative::Text(t2)) => { - assert_eq!(*t1, "string"); - assert_eq!(*t2, "🦀"); - } - _ => { - panic!("expected Alternative::Text") + for input in ["string", "🦀"] { + match unwrap_parser(alternative(Spanned::new(input))) { + Alternative::Text(t) => { + assert_eq!(*t, input, "on input: {}", input); + } + _ => panic!("expected Alternative::Text"), } } } #[test] fn escaped_spaces() { - match ( - unwrap_parser(alternative(Spanned::new("bef\\ "))), - unwrap_parser(alternative(Spanned::new("\\ aft"))), - unwrap_parser(alternative(Spanned::new("bef\\ aft"))), - ) { - ( - Alternative::Text(t1), - Alternative::Text(t2), - Alternative::Text(t3), - ) => { - assert_eq!(*t1, "bef\\ "); - assert_eq!(*t2, "\\ aft"); - assert_eq!(*t3, "bef\\ aft"); - } - _ => { - panic!("expected Alternative::Text") + for input in ["bef\\ ", "\\ aft", "bef\\ aft"] { + match unwrap_parser(alternative(Spanned::new(input))) { + Alternative::Text(t) => { + assert_eq!(*t, input, "on input: {}", input); + } + _ => panic!("expected Alternative::Text"), } } } @@ -1234,7 +1129,7 @@ mod spec { assert_eq!(**t, "opt"); } Alternative::Text(_) => { - panic!("expected Alternative::Optional") + panic!("expected Alternative::Optional"); } } } @@ -1248,7 +1143,7 @@ mod spec { match matched { Alternative::Text(t) => assert_eq!(*t, "text"), Alternative::Optional(_) => { - panic!("expected Alternative::Text") + panic!("expected Alternative::Text"); } } } @@ -1261,65 +1156,47 @@ mod spec { match alternative(Spanned::new("")).unwrap_err() { Err::Error(Error::Other(_, ErrorKind::Alt)) => {} e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => { - panic!("wrong error: {:?}", e) + panic!("wrong error: {:?}", e); } } } #[test] fn fails_on_unfinished_optional() { - let err = ( - alternative(Spanned::new("(")).unwrap_err(), - alternative(Spanned::new("(opt")).unwrap_err(), - ); - - match err { - ( - Err::Failure(Error::UnfinishedOptional(e1)), - Err::Failure(Error::UnfinishedOptional(e2)), - ) => { - assert_eq!(*e1, "("); - assert_eq!(*e2, "("); + for input in ["(", "(opt"] { + match alternative(Spanned::new(input)).unwrap_err() { + Err::Failure(Error::UnfinishedOptional(e)) => { + assert_eq!(*e, "(", "on input: {}", input); + } + e => panic!("wrong error: {:?}", e), } - _ => panic!("wrong error: {:?}", err), } } #[test] fn fails_on_escaped_non_reserved() { - let err = ( - alternative(Spanned::new("(\\r)")).unwrap_err(), - alternative(Spanned::new("\\r")).unwrap_err(), - ); - - match err { - ( - Err::Failure(Error::EscapedNonReservedCharacter(e1)), - Err::Failure(Error::EscapedNonReservedCharacter(e2)), - ) => { - assert_eq!(*e1, "\\r"); - assert_eq!(*e2, "\\r"); + for input in ["(\\r)", "\\r"] { + match alternative(Spanned::new(input)).unwrap_err() { + Err::Failure(Error::EscapedNonReservedCharacter(e)) => { + assert_eq!(*e, "\\r", "on input: {}", input); + } + e => panic!("wrong error: {:?}", e), } - _ => panic!("wrong error: {:?}", err), } } } mod alternation { use super::{ - alternation, eq, unwrap_parser, Err, Error, ErrorKind, Spanned, + alternation, assert_ast_eq, unwrap_parser, Err, Error, ErrorKind, + Spanned, }; #[allow(clippy::non_ascii_literal)] #[test] fn basic() { - let ast = format!( - "{:#?}", - unwrap_parser(alternation(Spanned::new("l/🦀"))) - ); - - eq( - ast, + assert_ast_eq( + unwrap_parser(alternation(Spanned::new("l/🦀"))), r#"Alternation( [ [ @@ -1349,15 +1226,10 @@ mod spec { #[test] fn with_optionals() { - let ast = format!( - "{:#?}", + assert_ast_eq( unwrap_parser(alternation(Spanned::new( - "l(opt)/(opt)r/l(opt)r" + "l(opt)/(opt)r/l(opt)r", ))), - ); - - eq( - ast, r#"Alternation( [ [ @@ -1433,18 +1305,12 @@ mod spec { ); } - #[allow(clippy::too_many_lines)] #[test] fn with_more_optionals() { - let ast = format!( - "{:#?}", + assert_ast_eq( unwrap_parser(alternation(Spanned::new( - "l(opt)(opt)/(opt)(opt)r/(opt)m(opt)" + "l(opt)(opt)/(opt)(opt)r/(opt)m(opt)", ))), - ); - - eq( - ast, r#"Alternation( [ [ @@ -1544,91 +1410,59 @@ mod spec { #[test] fn errors_without_slash() { - match ( - alternation(Spanned::new("")).unwrap_err(), - alternation(Spanned::new("{par}")).unwrap_err(), - alternation(Spanned::new("text")).unwrap_err(), - alternation(Spanned::new("(opt)")).unwrap_err(), - ) { - ( - Err::Error(Error::Other(_, ErrorKind::Many1)), - Err::Error(Error::Other(_, ErrorKind::Many1)), - Err::Error(Error::Other(_, ErrorKind::Tag)), - Err::Error(Error::Other(_, ErrorKind::Tag)), - ) => {} - _ => panic!("wrong err"), + for (input, expected) in [ + ("", ErrorKind::Many1), + ("{par}", ErrorKind::Many1), + ("text", ErrorKind::Tag), + ("(opt)", ErrorKind::Tag), + ] { + match alternation(Spanned::new(input)).unwrap_err() { + Err::Error(Error::Other(_, kind)) => { + assert_eq!(kind, expected, "on input: {}", input); + } + e => panic!("wrong error: {:?}", e), + } } } #[test] fn fails_on_empty_alternation() { - let err = ( - alternation(Spanned::new("/")).unwrap_err(), - alternation(Spanned::new("l/")).unwrap_err(), - alternation(Spanned::new("/r")).unwrap_err(), - alternation(Spanned::new("l/m/")).unwrap_err(), - alternation(Spanned::new("l//r")).unwrap_err(), - alternation(Spanned::new("/m/r")).unwrap_err(), - ); - - match err { - ( - Err::Failure(Error::EmptyAlternation(e1)), - Err::Failure(Error::EmptyAlternation(e2)), - Err::Failure(Error::EmptyAlternation(e3)), - Err::Failure(Error::EmptyAlternation(e4)), - Err::Failure(Error::EmptyAlternation(e5)), - Err::Failure(Error::EmptyAlternation(e6)), - ) => { - assert_eq!(*e1, "/"); - assert_eq!(*e2, "/"); - assert_eq!(*e3, "/"); - assert_eq!(*e4, "/"); - assert_eq!(*e5, "/"); - assert_eq!(*e6, "/"); + for input in ["/", "l/", "/r", "l/m/", "l//r", "/m/r"] { + match alternation(Spanned::new(input)).unwrap_err() { + Err::Failure(Error::EmptyAlternation(e)) => { + assert_eq!(*e, "/", "on input: {}", input); + } + e => panic!("wrong error: {:?}", e), } - _ => panic!("wrong error: {:?}", err), } } #[test] fn fails_on_only_optional() { - let err = ( - alternation(Spanned::new("text/(opt)")).unwrap_err(), - alternation(Spanned::new("text/(opt)(opt)")).unwrap_err(), - alternation(Spanned::new("(opt)/text")).unwrap_err(), - alternation(Spanned::new("(opt)/(opt)")).unwrap_err(), - ); - - match err { - ( - Err::Failure(Error::OnlyOptionalInAlternation(e1)), - Err::Failure(Error::OnlyOptionalInAlternation(e2)), - Err::Failure(Error::OnlyOptionalInAlternation(e3)), - Err::Failure(Error::OnlyOptionalInAlternation(e4)), - ) => { - assert_eq!(*e1, "text/(opt)"); - assert_eq!(*e2, "text/(opt)(opt)"); - assert_eq!(*e3, "(opt)/text"); - assert_eq!(*e4, "(opt)/(opt)"); + for input in + ["text/(opt)", "text/(opt)(opt)", "(opt)/text", "(opt)/(opt)"] + { + match alternation(Spanned::new(input)).unwrap_err() { + Err::Failure(Error::OnlyOptionalInAlternation(e)) => { + assert_eq!(*e, input, "on input: {}", input); + } + e => panic!("wrong error: {:?}", e), } - _ => panic!("wrong error: {:?}", err), } } } - // all test examples from: https://bit.ly/3q6m53v + // All test examples from: + // Naming of test cases is preserved. mod expression { - use super::{eq, expression, unwrap_parser, Err, Error, Spanned}; + use super::{ + assert_ast_eq, expression, unwrap_parser, Err, Error, Spanned, + }; #[test] fn allows_escaped_optional_parameter_types() { - let ast = format!( - "{:#?}", - unwrap_parser(expression(Spanned::new("\\({int})"))) - ); - eq( - ast, + assert_ast_eq( + unwrap_parser(expression(Spanned::new("\\({int})"))), r#"Expression( [ Text( @@ -1664,12 +1498,8 @@ mod spec { #[test] fn allows_parameter_type_in_alternation() { - let ast = format!( - "{:#?}", - unwrap_parser(expression(Spanned::new("a/i{int}n/y"))) - ); - eq( - ast, + assert_ast_eq( + unwrap_parser(expression(Spanned::new("a/i{int}n/y"))), r#"Expression( [ Alternation( @@ -1741,12 +1571,8 @@ mod spec { #[test] fn does_allow_parameter_adjacent_to_alternation() { - let ast = format!( - "{:#?}", - unwrap_parser(expression(Spanned::new("{int}st/nd/rd/th"))) - ); - eq( - ast, + assert_ast_eq( + unwrap_parser(expression(Spanned::new("{int}st/nd/rd/th"))), r#"Expression( [ Parameter( @@ -1819,41 +1645,52 @@ mod spec { assert_eq!(*s, "/"); } e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => { - panic!("wrong error: {:?}", e) + panic!("wrong error: {:?}", e); } } } #[rustfmt::skip] #[test] - fn does_not_allow_alternation_with_empty_alternative_by_adjacent_left_parameter() { + fn does_not_allow_alternation_with_empty_alternative_by_adjacent_left_parameter( + ) { match expression(Spanned::new("{int}/x")).unwrap_err() { Err::Failure(Error::EmptyAlternation(s)) => { assert_eq!(*s, "/"); } - e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => panic!("wrong error: {:?}", e), + e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => { + panic!("wrong error: {:?}", e); + } } } #[rustfmt::skip] #[test] - fn does_not_allow_alternation_with_empty_alternative_by_adjacent_optional() { - match expression(Spanned::new("three (brown)/black mice")).unwrap_err() { + fn does_not_allow_alternation_with_empty_alternative_by_adjacent_optional( + ) { + match expression(Spanned::new("three (brown)/black mice")) + .unwrap_err() + { Err::Failure(Error::OnlyOptionalInAlternation(s)) => { assert_eq!(*s, "(brown)/black"); } - e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => panic!("wrong error: {:?}", e), + e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => { + panic!("wrong error: {:?}", e); + } } } #[rustfmt::skip] #[test] - fn does_not_allow_alternation_with_empty_alternative_by_adjacent_right_parameter() { + fn does_not_allow_alternation_with_empty_alternative_by_adjacent_right_parameter( + ) { match expression(Spanned::new("x/{int}")).unwrap_err() { Err::Failure(Error::EmptyAlternation(s)) => { assert_eq!(*s, "/"); } - e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => panic!("wrong error: {:?}", e), + e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => { + panic!("wrong error: {:?}", e); + } } } @@ -1866,7 +1703,7 @@ mod spec { assert_eq!(*s, "/"); } e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => { - panic!("wrong error: {:?}", e) + panic!("wrong error: {:?}", e); } } } @@ -1878,7 +1715,7 @@ mod spec { assert_eq!(*s, "()"); } e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => { - panic!("wrong error: {:?}", e) + panic!("wrong error: {:?}", e); } } } @@ -1890,7 +1727,7 @@ mod spec { assert_eq!(*s, "(b)"); } e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => { - panic!("wrong error: {:?}", e) + panic!("wrong error: {:?}", e); } } } @@ -1902,7 +1739,7 @@ mod spec { assert_eq!(*s, "{int}"); } e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => { - panic!("wrong error: {:?}", e) + panic!("wrong error: {:?}", e); } } } @@ -1914,7 +1751,7 @@ mod spec { assert_eq!(*s, "(string)"); } e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => { - panic!("wrong error: {:?}", e) + panic!("wrong error: {:?}", e); } } } @@ -1930,7 +1767,7 @@ mod spec { assert_eq!(*s, "{"); } e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => { - panic!("wrong error: {:?}", e) + panic!("wrong error: {:?}", e); } } } @@ -1946,7 +1783,7 @@ mod spec { assert_eq!(*s, "{string}"); } e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => { - panic!("wrong error: {:?}", e) + panic!("wrong error: {:?}", e); } } } @@ -1962,21 +1799,17 @@ mod spec { assert_eq!(*s, "("); } e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => { - panic!("wrong error: {:?}", e) + panic!("wrong error: {:?}", e); } } } #[test] fn matches_alternation() { - let ast = format!( - "{:#?}", + assert_ast_eq( unwrap_parser(expression(Spanned::new( - "mice/rats and rats\\/mice" - ))) - ); - eq( - ast, + "mice/rats and rats\\/mice", + ))), r#"Expression( [ Alternation( @@ -2044,10 +1877,8 @@ mod spec { #[test] fn matches_anonymous_parameter_type() { - let ast = - format!("{:#?}", unwrap_parser(expression(Spanned::new("{}")))); - eq( - ast, + assert_ast_eq( + unwrap_parser(expression(Spanned::new("{}"))), r#"Expression( [ Parameter( @@ -2067,14 +1898,10 @@ mod spec { #[test] fn matches_doubly_escaped_parenthesis() { - let ast = format!( - "{:#?}", + assert_ast_eq( unwrap_parser(expression(Spanned::new( - "three \\(exceptionally) \\{string} mice" - ))) - ); - eq( - ast, + "three \\(exceptionally) \\{string} mice", + ))), r#"Expression( [ Text( @@ -2140,12 +1967,8 @@ mod spec { #[test] fn matches_doubly_escaped_slash() { - let ast = format!( - "{:#?}", - unwrap_parser(expression(Spanned::new("12\\\\/2020"))) - ); - eq( - ast, + assert_ast_eq( + unwrap_parser(expression(Spanned::new("12\\\\/2020"))), r#"Expression( [ Alternation( @@ -2181,14 +2004,10 @@ mod spec { #[test] fn matches_optional_before_alternation() { - let ast = format!( - "{:#?}", + assert_ast_eq( unwrap_parser(expression(Spanned::new( - "three (brown )mice/rats" - ))) - ); - eq( - ast, + "three (brown )mice/rats", + ))), r#"Expression( [ Text( @@ -2250,14 +2069,10 @@ mod spec { #[test] fn matches_optional_in_alternation() { - let ast = format!( - "{:#?}", + assert_ast_eq( unwrap_parser(expression(Spanned::new( - "{int} rat(s)/mouse/mice" - ))) - ); - eq( - ast, + "{int} rat(s)/mouse/mice", + ))), r#"Expression( [ Parameter( @@ -2334,16 +2149,17 @@ mod spec { match expression(Spanned::new("\\")).unwrap_err() { Err::Failure(Error::EscapedEndOfLine(_)) => {} e @ (Err::Incomplete(_) | Err::Error(_) | Err::Failure(_)) => { - panic!("wrong err: {}", e) + panic!("wrong err: {}", e); } } } #[test] fn empty() { - let ast = - format!("{:?}", unwrap_parser(expression(Spanned::new("")))); - eq(ast, r#"Expression([])"#); + assert_ast_eq( + unwrap_parser(expression(Spanned::new(""))), + r#"Expression([],)"#, + ); } } }