From 5d59b4412e71298e6e44b55afbfaa9dd86aee590 Mon Sep 17 00:00:00 2001 From: Anton Golov Date: Fri, 30 Jul 2021 16:09:33 +0200 Subject: [PATCH] Add warning when whitespace is not skipped after an escaped newline. --- compiler/rustc_ast/src/util/literal.rs | 24 ++++++++++--- compiler/rustc_lexer/src/unescape.rs | 34 ++++++++++++++++--- compiler/rustc_lexer/src/unescape/tests.rs | 19 +++++++++++ .../src/lexer/unescape_error_reporting.rs | 6 ++++ 4 files changed, 75 insertions(+), 8 deletions(-) diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index 2124f1efb990c..9c6ad47427d21 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -63,7 +63,11 @@ impl LitKind { unescape_literal(&s, Mode::Str, &mut |_, unescaped_char| { match unescaped_char { Ok(c) => buf.push(c), - Err(_) => error = Err(LitError::LexerError), + Err(err) => { + if err.is_fatal() { + error = Err(LitError::LexerError); + } + } } }); error?; @@ -83,7 +87,11 @@ impl LitKind { unescape_literal(&s, Mode::RawStr, &mut |_, unescaped_char| { match unescaped_char { Ok(c) => buf.push(c), - Err(_) => error = Err(LitError::LexerError), + Err(err) => { + if err.is_fatal() { + error = Err(LitError::LexerError); + } + } } }); error?; @@ -100,7 +108,11 @@ impl LitKind { unescape_byte_literal(&s, Mode::ByteStr, &mut |_, unescaped_byte| { match unescaped_byte { Ok(c) => buf.push(c), - Err(_) => error = Err(LitError::LexerError), + Err(err) => { + if err.is_fatal() { + error = Err(LitError::LexerError); + } + } } }); error?; @@ -114,7 +126,11 @@ impl LitKind { unescape_byte_literal(&s, Mode::RawByteStr, &mut |_, unescaped_byte| { match unescaped_byte { Ok(c) => buf.push(c), - Err(_) => error = Err(LitError::LexerError), + Err(err) => { + if err.is_fatal() { + error = Err(LitError::LexerError); + } + } } }); error?; diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index b4dd0fc2449ec..9a96c03cd3c80 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -7,7 +7,7 @@ use std::str::Chars; #[cfg(test)] mod tests; -/// Errors that can occur during string unescaping. +/// Errors and warnings that can occur during string unescaping. #[derive(Debug, PartialEq, Eq)] pub enum EscapeError { /// Expected 1 char, but 0 were found. @@ -56,6 +56,20 @@ pub enum EscapeError { NonAsciiCharInByte, /// Non-ascii character in byte string literal. NonAsciiCharInByteString, + + /// After a line ending with '\', the next line contains whitespace + /// characters that are not skipped. + UnskippedWhitespaceWarning, +} + +impl EscapeError { + /// Returns true for actual errors, as opposed to warnings. + pub fn is_fatal(&self) -> bool { + match self { + EscapeError::UnskippedWhitespaceWarning => false, + _ => true, + } + } } /// Takes a contents of a literal (without quotes) and produces a @@ -283,7 +297,7 @@ where // if unescaped '\' character is followed by '\n'. // For details see [Rust language reference] // (https://doc.rust-lang.org/reference/tokens.html#string-literals). - skip_ascii_whitespace(&mut chars); + skip_ascii_whitespace(&mut chars, start, callback); continue; } _ => scan_escape(first_char, &mut chars, mode), @@ -297,13 +311,25 @@ where callback(start..end, unescaped_char); } - fn skip_ascii_whitespace(chars: &mut Chars<'_>) { + fn skip_ascii_whitespace(chars: &mut Chars<'_>, start: usize, callback: &mut F) + where + F: FnMut(Range, Result), + { let str = chars.as_str(); let first_non_space = str .bytes() .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r') .unwrap_or(str.len()); - *chars = str[first_non_space..].chars() + let tail = &str[first_non_space..]; + if let Some(c) = tail.chars().nth(0) { + // For error reporting, we would like the span to contain the character that was not + // skipped. The +1 is necessary to account for the leading \ that started the escape. + let end = start + first_non_space + c.len_utf8() + 1; + if c.is_whitespace() { + callback(start..end, Err(EscapeError::UnskippedWhitespaceWarning)); + } + } + *chars = tail.chars(); } } diff --git a/compiler/rustc_lexer/src/unescape/tests.rs b/compiler/rustc_lexer/src/unescape/tests.rs index f2b751a78f27f..1f4dbb20f4e98 100644 --- a/compiler/rustc_lexer/src/unescape/tests.rs +++ b/compiler/rustc_lexer/src/unescape/tests.rs @@ -98,6 +98,25 @@ fn test_unescape_char_good() { check(r"\u{1F63b}", '😻'); } +#[test] +fn test_unescape_str_warn() { + fn check(literal: &str, expected: &[(Range, Result)]) { + let mut unescaped = Vec::with_capacity(literal.len()); + unescape_literal(literal, Mode::Str, &mut |range, res| unescaped.push((range, res))); + assert_eq!(unescaped, expected); + } + + check( + "\\\n \u{a0} x", + &[ + (0..5, Err(EscapeError::UnskippedWhitespaceWarning)), + (3..5, Ok('\u{a0}')), + (5..6, Ok(' ')), + (6..7, Ok('x')), + ], + ); +} + #[test] fn test_unescape_str_good() { fn check(literal_text: &str, expected: &str) { diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index a580f0c55d0e3..1c5be61130b61 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -253,6 +253,12 @@ pub(crate) fn emit_unescape_error( let msg = "invalid trailing slash in literal"; handler.struct_span_err(span, msg).span_label(span, msg).emit(); } + EscapeError::UnskippedWhitespaceWarning => { + let (c, char_span) = last_char(); + let msg = + format!("non-ASCII whitespace symbol '{}' is not skipped", c.escape_unicode()); + handler.struct_span_warn(span, &msg).span_label(char_span, &msg).emit(); + } } }