Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unescaping cleanups #102347

Merged
merged 2 commits into from
Sep 28, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 12 additions & 15 deletions compiler/rustc_lexer/src/unescape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ where
// NOTE: Raw strings do not perform any explicit character escaping, here we
// only translate CRLF to LF and produce errors on bare CR.
Mode::RawStr | Mode::RawByteStr => {
unescape_raw_str_or_byte_str(literal_text, mode, callback)
unescape_raw_str_or_raw_byte_str(literal_text, mode, callback)
}
}
}
Expand All @@ -105,7 +105,7 @@ pub fn unescape_byte_literal<F>(literal_text: &str, mode: Mode, callback: &mut F
where
F: FnMut(Range<usize>, Result<u8, EscapeError>),
{
assert!(mode.is_bytes());
debug_assert!(mode.is_bytes());
unescape_literal(literal_text, mode, &mut |range, result| {
callback(range, result.map(byte_from_char));
})
Expand All @@ -129,7 +129,7 @@ pub fn unescape_byte(literal_text: &str) -> Result<u8, (usize, EscapeError)> {
}

/// What kind of literal do we parse.
#[derive(Debug, Clone, Copy)]
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Mode {
Char,
Str,
Expand All @@ -140,17 +140,13 @@ pub enum Mode {
}

impl Mode {
pub fn in_single_quotes(self) -> bool {
pub fn in_double_quotes(self) -> bool {
match self {
Mode::Char | Mode::Byte => true,
Mode::Str | Mode::ByteStr | Mode::RawStr | Mode::RawByteStr => false,
Mode::Str | Mode::ByteStr | Mode::RawStr | Mode::RawByteStr => true,
Mode::Char | Mode::Byte => false,
}
}

pub fn in_double_quotes(self) -> bool {
!self.in_single_quotes()
}

pub fn is_bytes(self) -> bool {
match self {
Mode::Byte | Mode::ByteStr | Mode::RawByteStr => true,
Expand Down Expand Up @@ -184,7 +180,7 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {

let value = hi * 16 + lo;

// For a byte literal verify that it is within ASCII range.
// For a non-byte literal verify that it is within ASCII range.
if !mode.is_bytes() && !is_ascii(value) {
return Err(EscapeError::OutOfRangeHexEscape);
}
Expand Down Expand Up @@ -263,6 +259,7 @@ fn ascii_check(first_char: char, mode: Mode) -> Result<char, EscapeError> {
}

fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
debug_assert!(mode == Mode::Char || mode == Mode::Byte);
let first_char = chars.next().ok_or(EscapeError::ZeroChars)?;
let res = match first_char {
'\\' => scan_escape(chars, mode),
Expand All @@ -282,7 +279,7 @@ fn unescape_str_or_byte_str<F>(src: &str, mode: Mode, callback: &mut F)
where
F: FnMut(Range<usize>, Result<char, EscapeError>),
{
assert!(mode.in_double_quotes());
debug_assert!(mode == Mode::Str || mode == Mode::ByteStr);
let initial_len = src.len();
let mut chars = src.chars();
while let Some(first_char) = chars.next() {
Expand Down Expand Up @@ -344,11 +341,11 @@ where
/// sequence of characters or errors.
/// NOTE: Raw strings do not perform any explicit character escaping, here we
/// only translate CRLF to LF and produce errors on bare CR.
fn unescape_raw_str_or_byte_str<F>(literal_text: &str, mode: Mode, callback: &mut F)
fn unescape_raw_str_or_raw_byte_str<F>(literal_text: &str, mode: Mode, callback: &mut F)
where
F: FnMut(Range<usize>, Result<char, EscapeError>),
{
assert!(mode.in_double_quotes());
debug_assert!(mode == Mode::RawStr || mode == Mode::RawByteStr);
let initial_len = literal_text.len();

let mut chars = literal_text.chars();
Expand All @@ -368,7 +365,7 @@ where

fn byte_from_char(c: char) -> u8 {
let res = c as u32;
assert!(res <= u8::MAX as u32, "guaranteed because of Mode::ByteStr");
debug_assert!(res <= u8::MAX as u32, "guaranteed because of Mode::ByteStr");
res as u8
}

Expand Down