diff --git a/doc/choosing_a_combinator.md b/doc/choosing_a_combinator.md index b6073cf14..d9dde91df 100644 --- a/doc/choosing_a_combinator.md +++ b/doc/choosing_a_combinator.md @@ -50,6 +50,7 @@ Those are used to recognize the lowest level elements of your grammar, like, "he | [many_m_n](https://docs.rs/nom/latest/nom/multi/fn.many_m_n.html) | `many_m_n(1, 3, tag("ab"))` | `"ababc"` | `Ok(("c", vec!["ab", "ab"]))` |Applies the parser between m and n times (n included) and returns the list of results in a Vec| | [many_till](https://docs.rs/nom/latest/nom/multi/fn.many_till.html) | `many_till(tag( "ab" ), tag( "ef" ))` | `"ababefg"` | `Ok(("g", (vec!["ab", "ab"], "ef")))` |Applies the first parser until the second applies. Returns a tuple containing the list of results from the first in a Vec and the result of the second| | [separated_list0](https://docs.rs/nom/latest/nom/multi/fn.separated_list0.html) | `separated_list0(tag(","), tag("ab"))` | `"ab,ab,ab."` | `Ok((".", vec!["ab", "ab", "ab"]))` |`separated_list1` works like `separated_list0` but must returns at least one element| +| [separated_list_m_n](https://docs.rs/nom/latest/nom/multi/fn.separated_list0.html) | `separated_list_m_n(2, 3, tag(","), tag("ab"))` | `"ab,ab,ab."` | `Ok((".", vec!["ab", "ab", "ab"]))` | Alternately applies the item parser and the separator parser and returns the list of items in a Vec if the number is between m and n (inclusive).| | [fold_many0](https://docs.rs/nom/latest/nom/multi/fn.fold_many0.html) | `fold_many0(be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([], 6))` |Applies the parser 0 or more times and folds the list of return values. The `fold_many1` version must apply the child parser at least one time| | [fold_many_m_n](https://docs.rs/nom/latest/nom/multi/fn.fold_many_m_n.html) | `fold_many_m_n(1, 2, be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([3], 3))` |Applies the parser between m and n times (n included) and folds the list of return value| | [length_count](https://docs.rs/nom/latest/nom/multi/fn.length_count.html) | `length_count(number, tag("ab"))` | `"2ababab"` | `Ok(("ab", vec!["ab", "ab"]))` |Gets a number from the first parser, then applies the second parser that many times| diff --git a/src/multi/mod.rs b/src/multi/mod.rs index 2218106b3..f1dea5396 100644 --- a/src/multi/mod.rs +++ b/src/multi/mod.rs @@ -562,6 +562,148 @@ where } } +/// Alternates between two parsers to produce a list of at most `max` elements until [`Err::Error`]. +/// +/// Fails if the element parser does not produce at least `min` elements. +/// +/// This stops when either parser returns [`Err::Error`] or the number of elements reaches `max`, +/// and returns the results that were accumulated. To instead chain an error up, see +/// [`cut`][crate::combinator::cut]. +/// +/// # Arguments +/// * `min` The minimum number of elements. +/// * `max` The maximum number of elements. +/// * `separator` Parses the separator between list elements. +/// * `parser` Parses the elements of the list. +/// +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult, Parser}; +/// use nom::multi::separated_list_m_n; +/// use nom::bytes::complete::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// separated_list_m_n(2, 3, tag("|"), tag("abc")).parse(s) +/// } +/// +/// assert_eq!(parser("abc|abc|abc"), Ok(("", vec!["abc", "abc", "abc"]))); +/// assert_eq!(parser("abc|abc|def"), Ok(("|def", vec!["abc", "abc"]))); +/// assert_eq!(parser("abc1abc"), Err(Err::Error(Error::new("1abc", ErrorKind::SeparatedList)))); +/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Tag)))); +/// assert_eq!(parser("def|abc"), Err(Err::Error(Error::new("def|abc", ErrorKind::Tag)))); +/// ``` +#[cfg(feature = "alloc")] +#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] +pub fn separated_list_m_n<I, E, F, G>( + min: usize, + max: usize, + separator: G, + parser: F, +) -> impl Parser<I, Output = Vec<<F as Parser<I>>::Output>, Error = E> +where + I: Clone + InputLength, + F: Parser<I, Error = E>, + G: Parser<I, Error = E>, + E: ParseError<I>, +{ + SeparatedListMN { + parser, + separator, + min, + max, + } +} + +#[cfg(feature = "alloc")] +/// Parser implementation for the [separated_list_m_n] combinator +pub struct SeparatedListMN<F, G> { + parser: F, + separator: G, + min: usize, + max: usize, +} + +#[cfg(feature = "alloc")] +impl<I, E: ParseError<I>, F, G> Parser<I> for SeparatedListMN<F, G> +where + I: Clone + InputLength, + F: Parser<I, Error = E>, + G: Parser<I, Error = E>, +{ + type Output = Vec<<F as Parser<I>>::Output>; + type Error = <F as Parser<I>>::Error; + + fn process<OM: OutputMode>( + &mut self, + mut i: I, + ) -> crate::PResult<OM, I, Self::Output, Self::Error> { + let mut res = OM::Output::bind(crate::lib::std::vec::Vec::new); + let mut res_len = 0usize; + + match self.parser.process::<OM>(i.clone()) { + Err(e) => { + if (self.min..=self.max).contains(&res_len) { + return Ok((i, res)); + } else { + return Err(e); + } + } + Ok((i1, o)) => { + res = OM::Output::combine(res, o, |mut res, o| { + res.push(o); + res_len += 1; + res + }); + i = i1; + } + } + + loop { + let len = i.input_len(); + match self.separator.process::<OM>(i.clone()) { + Err(Err::Error(_)) => { + if (self.min..=self.max).contains(&res_len) { + return Ok((i, res)); + } else { + return Err(Err::Error(OM::Error::bind(|| { + <F as Parser<I>>::Error::from_error_kind(i, ErrorKind::SeparatedList) + }))); + } + } + Err(e) => return Err(e), + Ok((i1, _)) => { + // infinite loop check: the parser must always consume + if i1.input_len() == len { + return Err(Err::Error(OM::Error::bind(|| { + <F as Parser<I>>::Error::from_error_kind(i, ErrorKind::SeparatedList) + }))); + } + + match self.parser.process::<OM>(i1.clone()) { + Err(Err::Error(_)) => { + if (self.min..=self.max).contains(&res_len) { + return Ok((i, res)); + } else { + return Err(Err::Error(OM::Error::bind(|| { + <F as Parser<I>>::Error::from_error_kind(i, ErrorKind::SeparatedList) + }))); + } + } + Err(e) => return Err(e), + Ok((i2, o)) => { + res = OM::Output::combine(res, o, |mut res, o| { + res.push(o); + res_len += 1; + res + }); + i = i2; + } + } + } + } + } + } +} + /// Repeats the embedded parser `m..=n` times /// /// This stops before `n` when the parser returns [`Err::Error`] and returns the results that were accumulated. To instead chain an error up, see diff --git a/src/multi/tests.rs b/src/multi/tests.rs index 9f15ef4c9..41a912c79 100644 --- a/src/multi/tests.rs +++ b/src/multi/tests.rs @@ -15,7 +15,7 @@ use crate::{ lib::std::vec::Vec, multi::{ count, fold, fold_many0, fold_many1, fold_many_m_n, length_count, many, many0, many1, many_m_n, - many_till, separated_list0, separated_list1, + many_till, separated_list0, separated_list1, separated_list_m_n, }, }; @@ -103,6 +103,87 @@ fn separated_list1_test() { assert_eq!(multi(h), Err(Err::Incomplete(Needed::new(1)))); } +#[test] +#[cfg(feature = "alloc")] +fn separated_list_m_n_test() { + fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + separated_list_m_n(2, 4, tag(","), tag("abcd")).parse(i) + } + fn multi_empty(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + separated_list_m_n(2, 4, tag(","), tag("")).parse(i) + } + fn empty_sep(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + separated_list_m_n(2, 4, tag(""), tag("abc")).parse(i) + } + fn multi_longsep(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + separated_list_m_n(2, 4, tag(".."), tag("abcd")).parse(i) + } + fn multi0(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + separated_list_m_n(0, 3, tag(","), tag("abc")).parse(i) + } + + let no_items = &b"azerty"[..]; + let less_items = &b"abcdef"[..]; + let lower_bound = &b"abcd,abcdef"[..]; + let empty_items = &b",,abc"[..]; + let trailing_sep = &b"abcd,abcd,ef"[..]; + let incomplete_less_items = &b"abc"[..]; + let incomplete_sep = &b"abcd."[..]; + let incomplete_item = &b"abcd,abc"[..]; + let not_separated = &b"abcabc"[..]; + + let no_items_err_pos = &no_items[0..]; + assert_eq!( + multi(no_items), + Err(Err::Error(error_position!( + no_items_err_pos, + ErrorKind::Tag + ))) + ); + + let less_items_err_pos = &less_items[4..]; + assert_eq!( + multi(less_items), + Err(Err::Error(error_position!( + less_items_err_pos, + ErrorKind::SeparatedList + ))) + ); + + let lower_bound_res = vec![&b"abcd"[..], &b"abcd"[..]]; + assert_eq!(multi(lower_bound), Ok((&b"ef"[..], lower_bound_res))); + + let empty_items_res = vec![&b""[..], &b""[..], &b""[..]]; + assert_eq!(multi_empty(empty_items), Ok((&b"abc"[..], empty_items_res))); + + let not_separated_err_pos = ¬_separated[3..]; + assert_eq!( + empty_sep(not_separated), + Err(Err::Error(error_position!( + not_separated_err_pos, + ErrorKind::SeparatedList + ))) + ); + + let trailing_sep_res = vec![&b"abcd"[..], &b"abcd"[..]]; + assert_eq!(multi(trailing_sep), Ok((&b",ef"[..], trailing_sep_res))); + + assert_eq!( + multi(incomplete_less_items), + Err(Err::Incomplete(Needed::new(1))) + ); + + assert_eq!( + multi_longsep(incomplete_sep), + Err(Err::Incomplete(Needed::new(1))) + ); + + assert_eq!(multi(incomplete_item), Err(Err::Incomplete(Needed::new(1)))); + + let no_items0_res = vec![]; + assert_eq!(multi0(no_items), Ok((&no_items[0..], no_items0_res))); +} + #[test] #[cfg(feature = "alloc")] fn many0_test() {