From c91c64708b1acf3c262a2a6d6551f4bc0acfa656 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 27 Sep 2022 15:25:34 +1000 Subject: [PATCH 1/2] Fix an incorrect comment. If a `\x` escape occurs in a non-byte literals (e.g. char literal, string literal), it must be <= 0xff. --- compiler/rustc_lexer/src/unescape.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index 3da6bc14622a0..25d5f2772de40 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -184,7 +184,7 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result { let value = hi * 16 + lo; - // For a byte literal verify that it is within ASCII range. + // For a non-byte literal verify that it is within ASCII range. if !mode.is_bytes() && !is_ascii(value) { return Err(EscapeError::OutOfRangeHexEscape); } From 94cb5e86ea50ec878b1a5de9b0b64bb35e8027e3 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 27 Sep 2022 16:32:22 +1000 Subject: [PATCH 2/2] Small cleanups in unescaping code. - Rename `unescape_raw_str_or_raw_byte_str` as `unescape_raw_str_or_byte_str`, which is more accurate. - Remove the unused `Mode::in_single_quotes` method. - Make some assertions more precise, and add a missing one to `unescape_char_or_byte`. - Change all the assertions to `debug_assert!`, because this code is reasonably hot, and the assertions aren't required for memory safety, and any violations are likely to be sufficiently obvious that normal tests will trigger them. --- compiler/rustc_lexer/src/unescape.rs | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index 25d5f2772de40..8f64b5f5158e4 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -93,7 +93,7 @@ where // NOTE: Raw strings do not perform any explicit character escaping, here we // only translate CRLF to LF and produce errors on bare CR. Mode::RawStr | Mode::RawByteStr => { - unescape_raw_str_or_byte_str(literal_text, mode, callback) + unescape_raw_str_or_raw_byte_str(literal_text, mode, callback) } } } @@ -105,7 +105,7 @@ pub fn unescape_byte_literal(literal_text: &str, mode: Mode, callback: &mut F where F: FnMut(Range, Result), { - assert!(mode.is_bytes()); + debug_assert!(mode.is_bytes()); unescape_literal(literal_text, mode, &mut |range, result| { callback(range, result.map(byte_from_char)); }) @@ -129,7 +129,7 @@ pub fn unescape_byte(literal_text: &str) -> Result { } /// What kind of literal do we parse. -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, PartialEq)] pub enum Mode { Char, Str, @@ -140,17 +140,13 @@ pub enum Mode { } impl Mode { - pub fn in_single_quotes(self) -> bool { + pub fn in_double_quotes(self) -> bool { match self { - Mode::Char | Mode::Byte => true, - Mode::Str | Mode::ByteStr | Mode::RawStr | Mode::RawByteStr => false, + Mode::Str | Mode::ByteStr | Mode::RawStr | Mode::RawByteStr => true, + Mode::Char | Mode::Byte => false, } } - pub fn in_double_quotes(self) -> bool { - !self.in_single_quotes() - } - pub fn is_bytes(self) -> bool { match self { Mode::Byte | Mode::ByteStr | Mode::RawByteStr => true, @@ -263,6 +259,7 @@ fn ascii_check(first_char: char, mode: Mode) -> Result { } fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result { + debug_assert!(mode == Mode::Char || mode == Mode::Byte); let first_char = chars.next().ok_or(EscapeError::ZeroChars)?; let res = match first_char { '\\' => scan_escape(chars, mode), @@ -282,7 +279,7 @@ fn unescape_str_or_byte_str(src: &str, mode: Mode, callback: &mut F) where F: FnMut(Range, Result), { - assert!(mode.in_double_quotes()); + debug_assert!(mode == Mode::Str || mode == Mode::ByteStr); let initial_len = src.len(); let mut chars = src.chars(); while let Some(first_char) = chars.next() { @@ -344,11 +341,11 @@ where /// sequence of characters or errors. /// NOTE: Raw strings do not perform any explicit character escaping, here we /// only translate CRLF to LF and produce errors on bare CR. -fn unescape_raw_str_or_byte_str(literal_text: &str, mode: Mode, callback: &mut F) +fn unescape_raw_str_or_raw_byte_str(literal_text: &str, mode: Mode, callback: &mut F) where F: FnMut(Range, Result), { - assert!(mode.in_double_quotes()); + debug_assert!(mode == Mode::RawStr || mode == Mode::RawByteStr); let initial_len = literal_text.len(); let mut chars = literal_text.chars(); @@ -368,7 +365,7 @@ where fn byte_from_char(c: char) -> u8 { let res = c as u32; - assert!(res <= u8::MAX as u32, "guaranteed because of Mode::ByteStr"); + debug_assert!(res <= u8::MAX as u32, "guaranteed because of Mode::ByteStr"); res as u8 }