diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e9850b522..8501bc451d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ New features: Bug fixes: +* [BUG #594](https://github.com/rust-lang/regex/pull/594): + Improve error reporting when writing `\p\`. * [BUG #633](https://github.com/rust-lang/regex/pull/633): Squash deprecation warnings for the `std::error::Error::description` method. diff --git a/regex-syntax/src/ast/mod.rs b/regex-syntax/src/ast/mod.rs index 35fd051f18..7179f2d403 100644 --- a/regex-syntax/src/ast/mod.rs +++ b/regex-syntax/src/ast/mod.rs @@ -156,6 +156,9 @@ pub enum ErrorKind { /// `(?i)*`. It is, however, possible to create a repetition operating on /// an empty sub-expression. For example, `()*` is still considered valid. RepetitionMissing, + /// The Unicode class is not valid. This typically occurs when a `\p` is + /// followed by something other than a `{`. + UnicodeClassInvalid, /// When octal support is disabled, this error is produced when an octal /// escape is used. The octal escape is assumed to be an invocation of /// a backreference, which is the common case. @@ -208,6 +211,7 @@ impl error::Error for Error { RepetitionCountInvalid => "invalid repetition count range", RepetitionCountUnclosed => "unclosed counted repetition", RepetitionMissing => "repetition operator missing expression", + UnicodeClassInvalid => "invalid Unicode character class", UnsupportedBackreference => "backreferences are not supported", UnsupportedLookAround => "look-around is not supported", _ => unreachable!(), @@ -295,6 +299,9 @@ impl fmt::Display for ErrorKind { RepetitionMissing => { write!(f, "repetition operator missing expression") } + UnicodeClassInvalid => { + write!(f, "invalid Unicode character class") + } UnsupportedBackreference => { write!(f, "backreferences are not supported") } diff --git a/regex-syntax/src/ast/parse.rs b/regex-syntax/src/ast/parse.rs index c063ea9dc2..f5b4548b23 100644 --- a/regex-syntax/src/ast/parse.rs +++ b/regex-syntax/src/ast/parse.rs @@ -2095,6 +2095,12 @@ impl<'s, P: Borrow> ParserI<'s, P> { } else { let start = self.pos(); let c = self.char(); + if c == '\\' { + return Err(self.error( + self.span_char(), + ast::ErrorKind::UnicodeClassInvalid, + )); + } self.bump_and_bump_space(); let kind = ast::ClassUnicodeKind::OneLetter(c); (start, kind) @@ -5713,6 +5719,20 @@ bar ], })) ); + assert_eq!( + parser(r"\p\{").parse().unwrap_err(), + TestError { + span: span(2..3), + kind: ast::ErrorKind::UnicodeClassInvalid, + } + ); + assert_eq!( + parser(r"\P\{").parse().unwrap_err(), + TestError { + span: span(2..3), + kind: ast::ErrorKind::UnicodeClassInvalid, + } + ); } #[test]