diff --git a/src/error.rs b/src/error/mod.rs similarity index 86% rename from src/error.rs rename to src/error/mod.rs index fd7d9469..3e200df3 100644 --- a/src/error.rs +++ b/src/error/mod.rs @@ -26,10 +26,14 @@ use crate::lib::std::fmt; use core::num::NonZeroUsize; use crate::stream::AsBStr; +use crate::stream::AsOrd; use crate::stream::Stream; #[allow(unused_imports)] // Here for intra-doc links use crate::Parser; +#[cfg(test)] +mod tests; + /// For use with [`Parser::parse_peek`] which allows the input stream to be threaded through a /// parser. /// @@ -227,6 +231,21 @@ impl> AddContext for ErrMode { } } +impl MergeContext for ErrMode { + #[inline(always)] + fn merge_context(self, other: Self) -> Self { + match other.into_inner() { + Some(other) => self.map(|err| err.merge_context(other)), + None => self, + } + } + + #[inline] + fn clear_context(self) -> Self { + self.map(MergeContext::clear_context) + } +} + impl ErrMode> { /// Maps `ErrMode>` to `ErrMode>` with the given `F: T -> U` pub fn map_input(self, f: F) -> ErrMode> @@ -313,6 +332,15 @@ pub trait AddContext: Sized { } } +/// Merge contexts while backtracking. +pub trait MergeContext: Sized { + /// Apply the context from `other` into `self` + fn merge_context(self, _other: Self) -> Self; + + /// Remove all context + fn clear_context(self) -> Self; +} + /// Create a new error with an external error, from [`std::str::FromStr`] /// /// This trait is required by the [`Parser::try_map`] combinator. @@ -536,6 +564,32 @@ impl AddContext for ContextError { } } +impl MergeContext for ContextError { + #[inline] + fn merge_context(mut self, other: Self) -> Self { + // self and other get consumed to produce the new Context error. + // We choose the vector with the larger capacity to reduce the chance of reallocations. + #[cfg(feature = "alloc")] + { + let (mut context, other) = if self.context.capacity() >= other.context.capacity() { + (self.context, other.context) + } else { + (other.context, self.context) + }; + context.extend(other); + self.context = context; + } + self + } + + #[inline] + fn clear_context(mut self) -> Self { + #[cfg(feature = "alloc")] + self.context.clear(); + self + } +} + #[cfg(feature = "std")] impl FromExternalError for ContextError @@ -695,6 +749,194 @@ impl crate::lib::std::fmt::Display for StrContextValue { } } +/// Collect context of the longest matching parser while backtracking on errors +#[derive(Clone, Debug)] +pub struct LongestMatch +where + I: Stream, + ::Checkpoint: AsOrd, + E: MergeContext, +{ + checkpoint: Option<::Checkpoint>, + inner: E, +} + +impl LongestMatch +where + I: Stream, + ::Checkpoint: AsOrd, + E: MergeContext + Default, +{ + /// Create an empty error + pub fn new() -> Self { + Self { + checkpoint: None, + inner: Default::default(), + } + } +} + +// For tests +impl core::cmp::PartialEq for LongestMatch +where + I: Stream, + ::Checkpoint: AsOrd + core::cmp::PartialEq, + E: MergeContext + core::cmp::PartialEq, +{ + fn eq(&self, other: &Self) -> bool { + self.checkpoint == other.checkpoint && self.inner == other.inner + } +} + +impl LongestMatch +where + I: Stream, + ::Checkpoint: AsOrd, + E: MergeContext, +{ + /// Extract the error for the longest matching parser + #[inline] + pub fn into_inner(self) -> E { + self.inner + } + + #[inline] + fn cmp_checkpoints(&self, other: &Self) -> core::cmp::Ordering { + let a = self.checkpoint.as_ref().map(|c| c.as_ord()); + let b = other.checkpoint.as_ref().map(|c| c.as_ord()); + a.cmp(&b) + } + + #[inline] + fn cmp_with_checkpoint(&self, other: &::Checkpoint) -> core::cmp::Ordering { + match self.checkpoint { + None => core::cmp::Ordering::Less, + Some(ref c) => c.as_ord().cmp(&other.as_ord()), + } + } +} + +impl Default for LongestMatch +where + I: Stream, + ::Checkpoint: AsOrd, + E: MergeContext + Default, +{ + fn default() -> Self { + Self::new() + } +} + +impl ParserError for LongestMatch +where + I: Stream, + ::Checkpoint: AsOrd, + E: ParserError + MergeContext, +{ + #[inline] + fn from_error_kind(input: &I, kind: ErrorKind) -> Self { + Self { + checkpoint: Some(input.checkpoint()), + inner: E::from_error_kind(input, kind), + } + } + + #[inline] + fn append(mut self, input: &I, kind: ErrorKind) -> Self { + let checkpoint = input.checkpoint(); + match self.cmp_with_checkpoint(&checkpoint) { + core::cmp::Ordering::Less => { + self.checkpoint = Some(checkpoint); + self.inner = self.inner.clear_context().append(input, kind); + } + core::cmp::Ordering::Equal => { + self.inner = self.inner.append(input, kind); + } + core::cmp::Ordering::Greater => {} + } + self + } + + #[inline] + fn or(self, other: Self) -> Self { + self.merge_context(other) + } +} + +impl AddContext for LongestMatch +where + I: Stream, + ::Checkpoint: AsOrd, + E: AddContext + MergeContext, +{ + #[inline] + fn add_context(mut self, input: &I, ctx: C) -> Self { + let checkpoint = input.checkpoint(); + match self.cmp_with_checkpoint(&checkpoint) { + core::cmp::Ordering::Less => { + self.checkpoint = Some(checkpoint); + self.inner = self.inner.clear_context().add_context(input, ctx); + } + core::cmp::Ordering::Equal => { + self.inner = self.inner.add_context(input, ctx); + } + core::cmp::Ordering::Greater => {} + } + self + } +} + +impl MergeContext for LongestMatch +where + I: Stream, + ::Checkpoint: AsOrd, +{ + #[inline] + fn merge_context(mut self, other: Self) -> Self { + match self.cmp_checkpoints(&other) { + core::cmp::Ordering::Less => other, + core::cmp::Ordering::Greater => self, + core::cmp::Ordering::Equal => { + self.inner = self.inner.merge_context(other.inner); + self + } + } + } + + fn clear_context(self) -> Self { + Self { + checkpoint: None, + inner: self.inner.clear_context(), + } + } +} + +impl FromExternalError for LongestMatch +where + I: Stream, + ::Checkpoint: AsOrd, + E: FromExternalError + MergeContext, +{ + #[inline] + fn from_external_error(input: &I, kind: ErrorKind, e: EX) -> Self { + Self { + checkpoint: Some(input.checkpoint()), + inner: E::from_external_error(input, kind, e), + } + } +} + +impl crate::lib::std::fmt::Display for LongestMatch +where + I: Stream, + ::Checkpoint: AsOrd, + E: crate::lib::std::fmt::Display + MergeContext, +{ + fn fmt(&self, f: &mut crate::lib::std::fmt::Formatter<'_>) -> crate::lib::std::fmt::Result { + self.inner.fmt(f) + } +} + /// Trace all error paths, particularly for tests #[derive(Debug)] #[cfg(feature = "std")] diff --git a/src/error/tests.rs b/src/error/tests.rs new file mode 100644 index 00000000..683fc535 --- /dev/null +++ b/src/error/tests.rs @@ -0,0 +1,190 @@ +use super::*; + +mod longest_match { + use super::*; + use crate::combinator::{alt, eof}; + + type Error<'a> = LongestMatch<&'a str, ContextError<&'static str>>; + + fn pattern<'a, O>( + p: impl Parser<&'a str, O, Error<'a>>, + label: &'static str, + ) -> impl Parser<&'a str, &'a str, Error<'a>> { + (p, eof).recognize().context(label) + } + + #[test] + fn parser_error_implementation() { + let input = "abcd"; + let checkpoint1 = &&input[2..]; + let checkpoint2 = &&input[3..]; + + assert_eq!( + Error::new() + .append(checkpoint1, ErrorKind::Token) + .append(checkpoint2, ErrorKind::Tag), + Error::from_error_kind(checkpoint2, ErrorKind::Tag), + ); + + assert_eq!( + Error::new() + .append(checkpoint2, ErrorKind::Tag) + .append(checkpoint1, ErrorKind::Token), + Error::from_error_kind(checkpoint2, ErrorKind::Tag), + ); + + assert_eq!( + Error::new() + .append(checkpoint1, ErrorKind::Token) + .append(checkpoint1, ErrorKind::Tag), + Error::from_error_kind(checkpoint1, ErrorKind::Token), + ); + + assert_eq!( + Error::from_error_kind(checkpoint1, ErrorKind::Token) + .or(Error::from_error_kind(checkpoint2, ErrorKind::Tag)), + Error::from_error_kind(checkpoint2, ErrorKind::Tag), + ); + + assert_eq!( + Error::from_error_kind(checkpoint2, ErrorKind::Tag) + .or(Error::from_error_kind(checkpoint1, ErrorKind::Token)), + Error::from_error_kind(checkpoint2, ErrorKind::Tag), + ); + + assert_eq!( + Error::from_error_kind(checkpoint1, ErrorKind::Token) + .or(Error::from_error_kind(checkpoint1, ErrorKind::Tag)), + Error::from_error_kind(checkpoint1, ErrorKind::Token), + ); + } + + #[test] + fn add_context() { + let input = "abcd"; + let checkpoint1 = &&input[2..]; + let checkpoint2 = &&input[3..]; + + assert_eq!( + Error::new() + .add_context(checkpoint1, "don't want") + .add_context(checkpoint2, "want"), + Error::new().add_context(checkpoint2, "want"), + ); + + assert_eq!( + Error::new() + .add_context(checkpoint2, "want") + .add_context(checkpoint1, "don't want"), + Error::new().add_context(checkpoint2, "want"), + ); + + assert_eq!( + Error::new() + .add_context(checkpoint1, "want") + .add_context(checkpoint1, "also want") + .into_inner(), + ContextError::new() + .add_context(checkpoint1, "want") + .add_context(checkpoint1, "also want"), + ); + } + + #[test] + fn merge_context() { + let input = "abcd"; + let checkpoint1 = &&input[2..]; + let checkpoint2 = &&input[3..]; + + assert_eq!( + Error::new() + .add_context(checkpoint1, "don't want") + .clear_context(), + Error::new(), + ); + + assert_eq!( + Error::new() + .add_context(checkpoint1, "don't want") + .merge_context(Error::new().add_context(checkpoint2, "want")), + Error::new().add_context(checkpoint2, "want"), + ); + + assert_eq!( + Error::new() + .add_context(checkpoint2, "want") + .merge_context(Error::new().add_context(checkpoint1, "don't want")), + Error::new().add_context(checkpoint2, "want"), + ); + + assert_eq!( + Error::new() + .add_context(checkpoint1, "want") + .merge_context(Error::new().add_context(checkpoint1, "also want")) + .into_inner(), + ContextError::new() + .add_context(checkpoint1, "want") + .add_context(checkpoint1, "also want"), + ); + } + + #[test] + fn single_longest_match_first_in_alt() { + let mut parser = alt(( + pattern(('a', 'b', 'c', 'd'), "wanted"), + pattern(('a', 'b', 'c'), "don't want 1"), + pattern(('a', 'b'), "don't want 2"), + )); + + let mut input = "abcde"; + let checkpoint = &&input[4..]; // 4 characters consumed by longest match + assert_eq!( + parser.parse_next(&mut input), + Err(ErrMode::Backtrack( + LongestMatch::new().add_context(checkpoint, "wanted"), + )) + ); + } + + #[test] + fn multi_longest_match() { + let mut parser = alt(( + pattern(('d', 'e', 'f'), "don't want"), + pattern(('a', 'b', 'c', 'd'), "wanted 1"), + pattern(('a', 'b', 'c'), "wanted 2"), + pattern(('d', 'e', 'f', 'g'), "don't want"), + )); + + let mut input = "abd"; + let checkpoint = &&input[2..]; // 2 characters consumed by longest match + assert_eq!( + parser.parse_next(&mut input), + Err(ErrMode::Backtrack( + LongestMatch::new() + .add_context(checkpoint, "wanted 1") + .add_context(checkpoint, "wanted 2"), + )) + ); + } + + #[test] + fn multi_longest_match_input_short() { + let mut parser = alt(( + pattern(('d', 'e', 'f'), "don't want"), + pattern(('a', 'b', 'c', 'd'), "wanted 1"), + pattern(('a', 'b', 'c'), "wanted 2"), + pattern(('d', 'e', 'f', 'g'), "don't want"), + )); + + let mut input = "ab"; + let checkpoint = &&input[2..]; // 2 characters consumed by longest match + assert_eq!( + parser.parse_next(&mut input), + Err(ErrMode::Backtrack( + LongestMatch::new() + .add_context(checkpoint, "wanted 1") + .add_context(checkpoint, "wanted 2"), + )) + ); + } +} diff --git a/src/stream/mod.rs b/src/stream/mod.rs index 39a1e0d4..d8053e7e 100644 --- a/src/stream/mod.rs +++ b/src/stream/mod.rs @@ -1466,6 +1466,31 @@ where } } +/// Used to compare checkpoints +pub trait AsOrd { + /// The type used to compare checkpoint positions + type Ord: Ord + Clone + core::cmp::Ord + crate::lib::std::fmt::Debug; + + /// Get comparable value + fn as_ord(&self) -> Self::Ord; +} + +impl<'a, T> AsOrd for &'a [T] { + type Ord = *const T; + + fn as_ord(&self) -> Self::Ord { + self.as_ptr() + } +} + +impl<'a> AsOrd for &'a str { + type Ord = *const u8; + + fn as_ord(&self) -> Self::Ord { + self.as_ptr() + } +} + /// Helper trait for types that can be viewed as a byte slice pub trait AsBytes { /// Casts the input type to a byte slice @@ -2200,6 +2225,22 @@ where #[derive(Copy, Clone, Debug)] pub struct Checkpoint(T); +impl PartialEq for Checkpoint { + fn eq(&self, other: &Self) -> bool { + self.0.eq(&other.0) + } +} + +impl Eq for Checkpoint {} + +impl AsOrd for Checkpoint { + type Ord = T::Ord; + + fn as_ord(&self) -> Self::Ord { + self.0.as_ord() + } +} + /// A range bounded inclusively for counting parses performed #[derive(PartialEq, Eq)] pub struct Range {