From 64a4dee4956e9d362eb071bfbd5ed8ad9a608f5c Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Fri, 8 May 2020 08:53:41 -0400 Subject: [PATCH] cli: improve invalid UTF-8 pattern error message When a pattern with invalid UTF-8 is given, the error message suggests unqualified use of hex escape sequences to match arbitrary bytes. But you *also* need to disable Unicode mode. So include that in the error message. Fixes #1339 --- CHANGELOG.md | 2 ++ crates/cli/src/pattern.rs | 11 ++++------- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c8c7b3b9..4ce6ae7d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ TBD === Bug fixes: +* [BUG #1339](https://github.com/BurntSushi/ripgrep/issues/1339): + Improve error message when a pattern with invalid UTF-8 is provided. * [BUG #1524](https://github.com/BurntSushi/ripgrep/issues/1524): Note how to escape a `$` when using `--replace`. * [BUG #1537](https://github.com/BurntSushi/ripgrep/issues/1537): diff --git a/crates/cli/src/pattern.rs b/crates/cli/src/pattern.rs index 8341e4daf..11e4a8b4c 100644 --- a/crates/cli/src/pattern.rs +++ b/crates/cli/src/pattern.rs @@ -38,9 +38,9 @@ impl fmt::Display for InvalidPatternError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( f, - "found invalid UTF-8 in pattern at byte offset {} \ - (use hex escape sequences to match arbitrary bytes \ - in a pattern, e.g., \\xFF): '{}'", + "found invalid UTF-8 in pattern at byte offset {}: {} \ + (disable Unicode mode and use hex escape sequences to match \ + arbitrary bytes in a pattern, e.g., '(?-u)\\xFF')", self.valid_up_to, self.original, ) } @@ -64,10 +64,7 @@ pub fn pattern_from_os(pattern: &OsStr) -> Result<&str, InvalidPatternError> { .to_string_lossy() .find('\u{FFFD}') .expect("a Unicode replacement codepoint for invalid UTF-8"); - InvalidPatternError { - original: escape_os(pattern), - valid_up_to: valid_up_to, - } + InvalidPatternError { original: escape_os(pattern), valid_up_to } }) }