Skip to content

Add special naming for every regex capture group in Parameter (#7) #8

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,25 @@ All user visible changes to `cucumber-expressions` crate will be documented in t



## [0.2.0] · 2022-??-??
[0.2.0]: /../../tree/v0.2.0

[Diff](/../../compare/v0.1.2...v0.2.0) | [Milestone](/../../milestone/4)

### BC Breaks

- Added `id` field to `Parameter` AST struct. ([#8], [#7])

### Added

- Support of capturing groups inside `Parameter` regex. ([#8], [#7])

[#7]: /../../issues/7
[#8]: /../../pull/8




## [0.1.2] · 2022-01-11
[0.1.2]: /../../tree/v0.1.2

Expand Down
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ rustdoc-args = ["--cfg", "docsrs"]

[features]
# Enables ability to expand AST into regex.
into-regex = ["either", "regex"]
into-regex = ["either", "regex", "regex-syntax"]

[dependencies]
derive_more = { version = "0.99.17", features = ["as_ref", "deref", "deref_mut", "display", "error", "from", "into"], default_features = false }
Expand All @@ -33,6 +33,7 @@ nom_locate = "4.0"
# "into-regex" feature dependencies
either = { version = "1.6", optional = true }
regex = { version = "1.5", optional = true }
regex-syntax = { version = "0.6", optional = true }

[workspace]
members = ["fuzz"]
2 changes: 1 addition & 1 deletion fuzz/fuzz_targets/parameter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ use cucumber_expressions::parse;
use libfuzzer_sys::fuzz_target;

fuzz_target!(|data: &str| {
let _ = parse::parameter(data);
let _ = parse::parameter(data, &mut 0);
});
11 changes: 10 additions & 1 deletion src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,4 +170,13 @@ pub struct Optional<Input>(pub Input);
///
/// [0]: crate#grammar
#[derive(AsRef, Clone, Copy, Debug, Deref, DerefMut, Eq, PartialEq)]
pub struct Parameter<Input>(pub Input);
pub struct Parameter<Input> {
/// Inner `Input`.
#[deref]
#[deref_mut]
pub input: Input,

/// Unique ID of this [`Parameter`] in the parsed [`Expression`].
#[as_ref(ignore)]
pub id: usize,
}
160 changes: 123 additions & 37 deletions src/expand/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,23 +150,44 @@ where
Parsing(parse::Error<Input>),

/// Expansion error.
#[display(fmt = "Regex expansion failed: {}", _0)]
Expansion(UnknownParameterError<Input>),
#[display(fmt = "Failed to expand regex: {}", _0)]
Expansion(ParameterError<Input>),

/// [`Regex`] creation error.
#[display(fmt = "Regex creation failed: {}", _0)]
Regex(regex::Error),
}

/// Error of an unknown [`Parameter`] being used in an [`Expression`].
#[derive(Clone, Copy, Debug, Display, Error)]
#[display(fmt = "Parameter '{}' not found.", not_found)]
pub struct UnknownParameterError<Input>
/// Possible [`Parameter`] errors being used in an [`Expression`].
#[derive(Clone, Debug, Display, Error)]
pub enum ParameterError<Input>
where
Input: fmt::Display,
{
/// [`Parameter`] not found.
pub not_found: Input,
#[display(fmt = "Parameter `{}` not found.", _0)]
NotFound(Input),

/// Failed to rename [`Regex`] capturing group.
#[display(
fmt = "Failed to rename capturing groups in regex `{}` of \
parameter `{}`: {}",
re,
parameter,
err
)]
RenameRegexGroup {
/// [`Parameter`] name.
parameter: Input,

/// [`Regex`] of the [`Parameter`].
re: String,

/// [`Error`] of parsing the [`Regex`] with renamed capturing groups.
///
/// [`Error`]: regex_syntax::Error
err: regex_syntax::Error,
},
}

/// Expansion of a [Cucumber Expressions][0] [AST] element into a [`Regex`] by
Expand All @@ -177,7 +198,7 @@ where
/// [AST]: https://en.wikipedia.org/wiki/Abstract_syntax_tree
pub trait IntoRegexCharIter<Input: fmt::Display> {
/// Type of an [`Iterator`] performing the expansion.
type Iter: Iterator<Item = Result<char, UnknownParameterError<Input>>>;
type Iter: Iterator<Item = Result<char, ParameterError<Input>>>;

/// Consumes this [AST] element returning an [`Iterator`] over [`char`]s
/// transformable into a [`Regex`].
Expand Down Expand Up @@ -208,7 +229,7 @@ where
/// [`IntoRegexCharIter::Iter`] for an [`Expression`].
type ExpressionIter<Input> = iter::Chain<
iter::Chain<
iter::Once<Result<char, UnknownParameterError<Input>>>,
iter::Once<Result<char, ParameterError<Input>>>,
iter::FlatMap<
vec::IntoIter<SingleExpression<Input>>,
<SingleExpression<Input> as IntoRegexCharIter<Input>>::Iter,
Expand All @@ -218,7 +239,7 @@ type ExpressionIter<Input> = iter::Chain<
-> <SingleExpression<Input> as IntoRegexCharIter<Input>>::Iter,
>,
>,
iter::Once<Result<char, UnknownParameterError<Input>>>,
iter::Once<Result<char, ParameterError<Input>>>,
>;

impl<Input> IntoRegexCharIter<Input> for SingleExpression<Input>
Expand Down Expand Up @@ -307,7 +328,7 @@ type AlternationIter<I> = iter::Chain<
>,
>,
>,
iter::Once<Result<char, UnknownParameterError<I>>>,
iter::Once<Result<char, ParameterError<I>>>,
>;

// TODO: Replace with TAIT, once stabilized:
Expand All @@ -319,7 +340,7 @@ type AlternationIterInner<I> = iter::Chain<
<Alternative<I> as IntoRegexCharIter<I>>::Iter,
fn(Alternative<I>) -> <Alternative<I> as IntoRegexCharIter<I>>::Iter,
>,
iter::Once<Result<char, UnknownParameterError<I>>>,
iter::Once<Result<char, ParameterError<I>>>,
>;

impl<Input> IntoRegexCharIter<Input> for Alternative<Input>
Expand Down Expand Up @@ -397,7 +418,7 @@ type OptionalIter<Input> = iter::Map<
>;

/// Function pointer describing [`Ok`].
type MapOkChar<Input> = fn(char) -> Result<char, UnknownParameterError<Input>>;
type MapOkChar<Input> = fn(char) -> Result<char, ParameterError<Input>>;

impl<Input> IntoRegexCharIter<Input> for Parameter<Input>
where
Expand All @@ -413,36 +434,41 @@ where
i.iter_elements().map(AsChar::as_char).eq(str.chars())
};

if eq(&self.0, "int") {
Left(r#"((?:-?\d+)|(?:\d+))"#.chars().map(Ok))
} else if eq(&self.0, "float") {
if eq(&self.input, "int") {
Left(Left(r#"((?:-?\d+)|(?:\d+))"#.chars().map(Ok)))
} else if eq(&self.input, "float") {
// Regex in other implementations has lookaheads. As `regex` crate
// doesn't support them, we use `f32`/`f64` grammar instead:
// https://doc.rust-lang.org/stable/std/primitive.f64.html#grammar
// Provided grammar is a superset of the original one:
// - supports `e` as exponent in addition to `E`
// - supports trailing comma: `1.`
// - supports `inf` and `NaN`
Left(
Left(Left(
"([+-]?(?:inf\
|NaN\
|(?:\\d+|\\d+\\.\\d*|\\d*\\.\\d+)(?:[eE][+-]?\\d+)?\
))"
.chars()
.map(Ok),
)
} else if eq(&self.0, "word") {
Left(r#"([^\s]+)"#.chars().map(Ok))
} else if eq(&self.0, "string") {
Left(
r#"("(?:[^"\\]*(?:\\.[^"\\]*)*)"|'(?:[^'\\]*(?:\\.[^'\\]*)*)')"#
.chars()
.map(Ok),
)
} else if eq(&self.0, "") {
Left(r#"(.*)"#.chars().map(Ok))
))
} else if eq(&self.input, "word") {
Left(Left(r#"([^\s]+)"#.chars().map(Ok)))
} else if eq(&self.input, "string") {
Left(Right(
OwnedChars::new(format!(
"(?:\
\"(?P<__{id}_0>[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"\
|'(?P<__{id}_1>[^'\\\\]*(?:\\\\.[^'\\\\]*)*)'\
)",
id = self.id,
))
.map(Ok),
))
} else if eq(&self.input, "") {
Left(Left(r#"(.*)"#.chars().map(Ok)))
} else {
Right(iter::once(Err(UnknownParameterError { not_found: self.0 })))
Right(iter::once(Err(ParameterError::NotFound(self.input))))
}
}
}
Expand All @@ -451,11 +477,14 @@ where
// https://github.com/rust-lang/rust/issues/63063
/// [`IntoRegexCharIter::Iter`] for a [`Parameter`].
type ParameterIter<Input> = Either<
iter::Map<
str::Chars<'static>,
fn(char) -> Result<char, UnknownParameterError<Input>>,
Either<
iter::Map<
str::Chars<'static>,
fn(char) -> Result<char, ParameterError<Input>>,
>,
iter::Map<OwnedChars, fn(char) -> Result<char, ParameterError<Input>>>,
>,
iter::Once<Result<char, UnknownParameterError<Input>>>,
iter::Once<Result<char, ParameterError<Input>>>,
>;

/// [`Iterator`] for skipping a last [`Item`].
Expand Down Expand Up @@ -513,6 +542,36 @@ where
}
}

// TODO: Make private, once TAIT stabilized:
// https://github.com/rust-lang/rust/issues/63063
/// Like [`str::Chars`] [`Iterator`], but owns its [`String`].
#[derive(Clone, Debug)]
pub struct OwnedChars {
/// Iterated [`String`].
str: String,

/// Current char number.
cur: usize,
}

impl OwnedChars {
/// Creates a new [`OwnedChars`] [`Iterator`].
#[must_use]
pub const fn new(str: String) -> Self {
Self { str, cur: 0 }
}
}

impl Iterator for OwnedChars {
type Item = char;

fn next(&mut self) -> Option<Self::Item> {
let char = self.str.chars().nth(self.cur)?;
self.cur += 1;
Some(char)
}
}

/// [`Iterator`] for escaping `^`, `$`, `[`, `]`, `(`, `)`, `{`, `}`, `.`, `|`,
/// `?`, `*`, `+` with `\`, and removing it for other [`char`]s.
///
Expand Down Expand Up @@ -586,7 +645,7 @@ where
// Naming of test cases is preserved.
#[cfg(test)]
mod spec {
use super::{Error, Expression, UnknownParameterError};
use super::{Error, Expression, ParameterError};

#[test]
fn alternation_with_optional() {
Expand Down Expand Up @@ -699,7 +758,10 @@ mod spec {

assert_eq!(
expr.as_str(),
r#"^("(?:[^"\\]*(?:\\.[^"\\]*)*)"|'(?:[^'\\]*(?:\\.[^'\\]*)*)')$"#,
"^(?:\
\"(?P<__0_0>[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"\
|'(?P<__0_1>[^'\\\\]*(?:\\\\.[^'\\\\]*)*)'\
)$",
);
assert!(expr.is_match("\"\""));
assert!(expr.is_match("''"));
Expand All @@ -710,6 +772,30 @@ mod spec {
assert!(!expr.is_match("word"));
}

#[test]
fn multiple_string_parameters() {
// TODO: Use "{e}" syntax once MSRV bumps above 1.58.
let expr = Expression::regex("{string} {string}")
.unwrap_or_else(|e| panic!("failed: {}", e));

assert_eq!(
expr.as_str(),
"^(?:\
\"(?P<__0_0>[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"\
|'(?P<__0_1>[^'\\\\]*(?:\\\\.[^'\\\\]*)*)'\
) (?:\
\"(?P<__1_0>[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"\
|'(?P<__1_1>[^'\\\\]*(?:\\\\.[^'\\\\]*)*)'\
)$",
);
assert!(expr.is_match("\"\" ''"));
assert!(expr.is_match("'' \"\""));
assert!(expr.is_match("'with \"' \"\""));
assert!(expr.is_match("\"with '\" '\"'"));
assert!(expr.is_match("\"with \\\" escaped\" 'with \\' escaped'"));
assert!(expr.is_match("'with \\' escaped' \"with \\\" escaped\""));
}

#[test]
fn parameter_all() {
// TODO: Use "{e}" syntax once MSRV bumps above 1.58.
Expand Down Expand Up @@ -747,10 +833,10 @@ mod spec {
#[test]
fn unknown_parameter() {
match Expression::regex("{custom}").unwrap_err() {
Error::Expansion(UnknownParameterError { not_found }) => {
Error::Expansion(ParameterError::NotFound(not_found)) => {
assert_eq!(*not_found, "custom");
}
e @ (Error::Parsing(_) | Error::Regex(_)) => {
e @ (Error::Parsing(_) | Error::Regex(_) | Error::Expansion(_)) => {
// TODO: Use "{e}" syntax once MSRV bumps above 1.58.
panic!("wrong err: {}", e);
}
Expand Down
Loading