move VerboseError to nom-language

Geal · Geal · commit c8c03131a6b1 · 2024-12-08T15:06:53.000+01:00
diff --git a/Cargo.toml b/Cargo.toml
@@ -41,7 +41,7 @@ default-features = false
 [dev-dependencies]
 doc-comment = "0.3"
 proptest = "=1.0.0"
-
+nom-language = { path = "./nom-language" }
 
 [package.metadata.docs.rs]
 features = ["alloc", "std", "docsrs"]
diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml
@@ -56,3 +56,4 @@ harness = false
 
 [dev-dependencies]
 codspeed-criterion-compat = "2.4.1"
+nom-language = { path = "../nom-language" }
diff --git a/benchmarks/benches/json.rs b/benchmarks/benches/json.rs
@@ -7,13 +7,14 @@ use nom::{
   bytes::{tag, take},
   character::{anychar, char, multispace0, none_of},
   combinator::{map, map_opt, map_res, value, verify},
-  error::{Error, ErrorKind, FromExternalError, ParseError, VerboseError},
+  error::{Error, ErrorKind, FromExternalError, ParseError},
   multi::{fold, separated_list0},
   number::double,
   number::recognize_float,
   sequence::{delimited, preceded, separated_pair},
   Check, Complete, Emit, IResult, Mode, OutputM, Parser,
 };
+use nom_language::error::VerboseError;
 
 use std::{collections::HashMap, marker::PhantomData, num::ParseIntError};
 
diff --git a/examples/json.rs b/examples/json.rs
@@ -5,12 +5,13 @@ use nom::{
   bytes::complete::{escaped, tag, take_while},
   character::complete::{alphanumeric1 as alphanumeric, char, one_of},
   combinator::{cut, map, opt, value},
-  error::{context, convert_error, ContextError, ErrorKind, ParseError, VerboseError},
+  error::{context, ContextError, ErrorKind, ParseError},
   multi::separated_list0,
   number::complete::double,
   sequence::{delimited, preceded, separated_pair, terminated},
   Err, IResult, Parser,
 };
+use nom_language::error::{convert_error, VerboseError};
 use std::collections::HashMap;
 use std::str;
 
diff --git a/examples/s_expression.rs b/examples/s_expression.rs
@@ -9,11 +9,12 @@ use nom::{
   bytes::complete::tag,
   character::complete::{alpha1, char, digit1, multispace0, multispace1, one_of},
   combinator::{cut, map, map_res, opt},
-  error::{context, VerboseError},
+  error::context,
   multi::many,
   sequence::{delimited, preceded, terminated},
   IResult, Parser,
 };
+use nom_language::error::VerboseError;
 
 /// We start by defining the types that define the shape of data that we want.
 /// In this case, we want something tree-like
diff --git a/nom-language/src/error.rs b/nom-language/src/error.rs
@@ -0,0 +1,262 @@
+use std::fmt;
+
+use nom::{
+  error::{ContextError, ErrorKind, FromExternalError, ParseError},
+  ErrorConvert,
+};
+
+/// This error type accumulates errors and their position when backtracking
+/// through a parse tree. With some post processing,
+/// it can be used to display user friendly error messages
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct VerboseError<I> {
+  /// List of errors accumulated by `VerboseError`, containing the affected
+  /// part of input data, and some context
+  pub errors: Vec<(I, VerboseErrorKind)>,
+}
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+/// Error context for `VerboseError`
+pub enum VerboseErrorKind {
+  /// Static string added by the `context` function
+  Context(&'static str),
+  /// Indicates which character was expected by the `char` function
+  Char(char),
+  /// Error kind given by various nom parsers
+  Nom(ErrorKind),
+}
+
+impl<I> ParseError<I> for VerboseError<I> {
+  fn from_error_kind(input: I, kind: ErrorKind) -> Self {
+    VerboseError {
+      errors: vec![(input, VerboseErrorKind::Nom(kind))],
+    }
+  }
+
+  fn append(input: I, kind: ErrorKind, mut other: Self) -> Self {
+    other.errors.push((input, VerboseErrorKind::Nom(kind)));
+    other
+  }
+
+  fn from_char(input: I, c: char) -> Self {
+    VerboseError {
+      errors: vec![(input, VerboseErrorKind::Char(c))],
+    }
+  }
+}
+
+impl<I> ContextError<I> for VerboseError<I> {
+  fn add_context(input: I, ctx: &'static str, mut other: Self) -> Self {
+    other.errors.push((input, VerboseErrorKind::Context(ctx)));
+    other
+  }
+}
+
+impl<I, E> FromExternalError<I, E> for VerboseError<I> {
+  /// Create a new error from an input position and an external error
+  fn from_external_error(input: I, kind: ErrorKind, _e: E) -> Self {
+    Self::from_error_kind(input, kind)
+  }
+}
+
+impl<I: fmt::Display> fmt::Display for VerboseError<I> {
+  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    writeln!(f, "Parse error:")?;
+    for (input, error) in &self.errors {
+      match error {
+        VerboseErrorKind::Nom(e) => writeln!(f, "{:?} at: {}", e, input)?,
+        VerboseErrorKind::Char(c) => writeln!(f, "expected '{}' at: {}", c, input)?,
+        VerboseErrorKind::Context(s) => writeln!(f, "in section '{}', at: {}", s, input)?,
+      }
+    }
+
+    Ok(())
+  }
+}
+
+impl<I: fmt::Debug + fmt::Display> std::error::Error for VerboseError<I> {}
+
+impl From<VerboseError<&[u8]>> for VerboseError<Vec<u8>> {
+  fn from(value: VerboseError<&[u8]>) -> Self {
+    VerboseError {
+      errors: value
+        .errors
+        .into_iter()
+        .map(|(i, e)| (i.to_owned(), e))
+        .collect(),
+    }
+  }
+}
+
+impl From<VerboseError<&str>> for VerboseError<String> {
+  fn from(value: VerboseError<&str>) -> Self {
+    VerboseError {
+      errors: value
+        .errors
+        .into_iter()
+        .map(|(i, e)| (i.to_owned(), e))
+        .collect(),
+    }
+  }
+}
+
+impl<I> ErrorConvert<VerboseError<I>> for VerboseError<(I, usize)> {
+  fn convert(self) -> VerboseError<I> {
+    VerboseError {
+      errors: self.errors.into_iter().map(|(i, e)| (i.0, e)).collect(),
+    }
+  }
+}
+
+impl<I> ErrorConvert<VerboseError<(I, usize)>> for VerboseError<I> {
+  fn convert(self) -> VerboseError<(I, usize)> {
+    VerboseError {
+      errors: self.errors.into_iter().map(|(i, e)| ((i, 0), e)).collect(),
+    }
+  }
+}
+
+/// Transforms a `VerboseError` into a trace with input position information
+///
+/// The errors contain references to input data that must come from `input`,
+/// because nom calculates byte offsets between them
+pub fn convert_error<I: core::ops::Deref<Target = str>>(input: I, e: VerboseError<I>) -> String {
+  use nom::Offset;
+  use std::fmt::Write;
+
+  let mut result = String::new();
+
+  for (i, (substring, kind)) in e.errors.iter().enumerate() {
+    let offset = input.offset(substring);
+
+    if input.is_empty() {
+      match kind {
+        VerboseErrorKind::Char(c) => {
+          write!(&mut result, "{}: expected '{}', got empty input\n\n", i, c)
+        }
+        VerboseErrorKind::Context(s) => write!(&mut result, "{}: in {}, got empty input\n\n", i, s),
+        VerboseErrorKind::Nom(e) => write!(&mut result, "{}: in {:?}, got empty input\n\n", i, e),
+      }
+    } else {
+      let prefix = &input.as_bytes()[..offset];
+
+      // Count the number of newlines in the first `offset` bytes of input
+      let line_number = prefix.iter().filter(|&&b| b == b'\n').count() + 1;
+
+      // Find the line that includes the subslice:
+      // Find the *last* newline before the substring starts
+      let line_begin = prefix
+        .iter()
+        .rev()
+        .position(|&b| b == b'\n')
+        .map(|pos| offset - pos)
+        .unwrap_or(0);
+
+      // Find the full line after that newline
+      let line = input[line_begin..]
+        .lines()
+        .next()
+        .unwrap_or(&input[line_begin..])
+        .trim_end();
+
+      // The (1-indexed) column number is the offset of our substring into that line
+      let column_number = line.offset(substring) + 1;
+
+      match kind {
+        VerboseErrorKind::Char(c) => {
+          if let Some(actual) = substring.chars().next() {
+            write!(
+              &mut result,
+              "{i}: at line {line_number}:\n\
+               {line}\n\
+               {caret:>column$}\n\
+               expected '{expected}', found {actual}\n\n",
+              i = i,
+              line_number = line_number,
+              line = line,
+              caret = '^',
+              column = column_number,
+              expected = c,
+              actual = actual,
+            )
+          } else {
+            write!(
+              &mut result,
+              "{i}: at line {line_number}:\n\
+               {line}\n\
+               {caret:>column$}\n\
+               expected '{expected}', got end of input\n\n",
+              i = i,
+              line_number = line_number,
+              line = line,
+              caret = '^',
+              column = column_number,
+              expected = c,
+            )
+          }
+        }
+        VerboseErrorKind::Context(s) => write!(
+          &mut result,
+          "{i}: at line {line_number}, in {context}:\n\
+             {line}\n\
+             {caret:>column$}\n\n",
+          i = i,
+          line_number = line_number,
+          context = s,
+          line = line,
+          caret = '^',
+          column = column_number,
+        ),
+        VerboseErrorKind::Nom(e) => write!(
+          &mut result,
+          "{i}: at line {line_number}, in {nom_err:?}:\n\
+             {line}\n\
+             {caret:>column$}\n\n",
+          i = i,
+          line_number = line_number,
+          nom_err = e,
+          line = line,
+          caret = '^',
+          column = column_number,
+        ),
+      }
+    }
+    // Because `write!` to a `String` is infallible, this `unwrap` is fine.
+    .unwrap();
+  }
+
+  result
+}
+
+#[test]
+fn convert_error_panic() {
+  use nom::character::complete::char;
+  use nom::IResult;
+
+  let input = "";
+
+  let _result: IResult<_, _, VerboseError<&str>> = char('x')(input);
+}
+
+#[test]
+fn issue_1027_convert_error_panic_nonempty() {
+  use nom::character::complete::char;
+  use nom::sequence::pair;
+  use nom::Err;
+  use nom::IResult;
+  use nom::Parser;
+
+  let input = "a";
+
+  let result: IResult<_, _, VerboseError<&str>> = pair(char('a'), char('b')).parse(input);
+  let err = match result.unwrap_err() {
+    Err::Error(e) => e,
+    _ => unreachable!(),
+  };
+
+  let msg = convert_error(input, err);
+  assert_eq!(
+    msg,
+    "0: at line 1:\na\n ^\nexpected \'b\', got end of input\n\n"
+  );
+}
diff --git a/src/error.rs b/src/error.rs
diff --git a/src/traits.rs b/src/traits.rs
diff --git a/tests/issues.rs b/tests/issues.rs

Original file line number	Diff line number	Diff line change
`@@ -56,3 +56,4 @@ harness = false`
`56`	`56`
`57`	`57`	`[dev-dependencies]`
`58`	`58`	`codspeed-criterion-compat = "2.4.1"`
	`59`	`+nom-language = { path = "../nom-language" }`