Skip to content

Commit aa9e6aa

Browse files
authored
Rollup merge of #87596 - jesyspa:issue-87318-hidden-whitespace, r=estebank
Add warning when whitespace is not skipped after an escaped newline Fixes issue #87318, also simplifies issue #87319. * Add support to the lexer to emit warnings as well as errors. * Emit a warning when a string literal contains an escaped newline, but when (some of) the whitespace on the next line is not skipped due to it being non-ASCII.
2 parents 5e2655d + 5d59b44 commit aa9e6aa

File tree

4 files changed

+75
-8
lines changed

4 files changed

+75
-8
lines changed

compiler/rustc_ast/src/util/literal.rs

+20-4
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,11 @@ impl LitKind {
6363
unescape_literal(&s, Mode::Str, &mut |_, unescaped_char| {
6464
match unescaped_char {
6565
Ok(c) => buf.push(c),
66-
Err(_) => error = Err(LitError::LexerError),
66+
Err(err) => {
67+
if err.is_fatal() {
68+
error = Err(LitError::LexerError);
69+
}
70+
}
6771
}
6872
});
6973
error?;
@@ -83,7 +87,11 @@ impl LitKind {
8387
unescape_literal(&s, Mode::RawStr, &mut |_, unescaped_char| {
8488
match unescaped_char {
8589
Ok(c) => buf.push(c),
86-
Err(_) => error = Err(LitError::LexerError),
90+
Err(err) => {
91+
if err.is_fatal() {
92+
error = Err(LitError::LexerError);
93+
}
94+
}
8795
}
8896
});
8997
error?;
@@ -100,7 +108,11 @@ impl LitKind {
100108
unescape_byte_literal(&s, Mode::ByteStr, &mut |_, unescaped_byte| {
101109
match unescaped_byte {
102110
Ok(c) => buf.push(c),
103-
Err(_) => error = Err(LitError::LexerError),
111+
Err(err) => {
112+
if err.is_fatal() {
113+
error = Err(LitError::LexerError);
114+
}
115+
}
104116
}
105117
});
106118
error?;
@@ -114,7 +126,11 @@ impl LitKind {
114126
unescape_byte_literal(&s, Mode::RawByteStr, &mut |_, unescaped_byte| {
115127
match unescaped_byte {
116128
Ok(c) => buf.push(c),
117-
Err(_) => error = Err(LitError::LexerError),
129+
Err(err) => {
130+
if err.is_fatal() {
131+
error = Err(LitError::LexerError);
132+
}
133+
}
118134
}
119135
});
120136
error?;

compiler/rustc_lexer/src/unescape.rs

+30-4
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use std::str::Chars;
77
#[cfg(test)]
88
mod tests;
99

10-
/// Errors that can occur during string unescaping.
10+
/// Errors and warnings that can occur during string unescaping.
1111
#[derive(Debug, PartialEq, Eq)]
1212
pub enum EscapeError {
1313
/// Expected 1 char, but 0 were found.
@@ -56,6 +56,20 @@ pub enum EscapeError {
5656
NonAsciiCharInByte,
5757
/// Non-ascii character in byte string literal.
5858
NonAsciiCharInByteString,
59+
60+
/// After a line ending with '\', the next line contains whitespace
61+
/// characters that are not skipped.
62+
UnskippedWhitespaceWarning,
63+
}
64+
65+
impl EscapeError {
66+
/// Returns true for actual errors, as opposed to warnings.
67+
pub fn is_fatal(&self) -> bool {
68+
match self {
69+
EscapeError::UnskippedWhitespaceWarning => false,
70+
_ => true,
71+
}
72+
}
5973
}
6074

6175
/// Takes a contents of a literal (without quotes) and produces a
@@ -283,7 +297,7 @@ where
283297
// if unescaped '\' character is followed by '\n'.
284298
// For details see [Rust language reference]
285299
// (https://doc.rust-lang.org/reference/tokens.html#string-literals).
286-
skip_ascii_whitespace(&mut chars);
300+
skip_ascii_whitespace(&mut chars, start, callback);
287301
continue;
288302
}
289303
_ => scan_escape(first_char, &mut chars, mode),
@@ -297,13 +311,25 @@ where
297311
callback(start..end, unescaped_char);
298312
}
299313

300-
fn skip_ascii_whitespace(chars: &mut Chars<'_>) {
314+
fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F)
315+
where
316+
F: FnMut(Range<usize>, Result<char, EscapeError>),
317+
{
301318
let str = chars.as_str();
302319
let first_non_space = str
303320
.bytes()
304321
.position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
305322
.unwrap_or(str.len());
306-
*chars = str[first_non_space..].chars()
323+
let tail = &str[first_non_space..];
324+
if let Some(c) = tail.chars().nth(0) {
325+
// For error reporting, we would like the span to contain the character that was not
326+
// skipped. The +1 is necessary to account for the leading \ that started the escape.
327+
let end = start + first_non_space + c.len_utf8() + 1;
328+
if c.is_whitespace() {
329+
callback(start..end, Err(EscapeError::UnskippedWhitespaceWarning));
330+
}
331+
}
332+
*chars = tail.chars();
307333
}
308334
}
309335

compiler/rustc_lexer/src/unescape/tests.rs

+19
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,25 @@ fn test_unescape_char_good() {
9898
check(r"\u{1F63b}", '😻');
9999
}
100100

101+
#[test]
102+
fn test_unescape_str_warn() {
103+
fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
104+
let mut unescaped = Vec::with_capacity(literal.len());
105+
unescape_literal(literal, Mode::Str, &mut |range, res| unescaped.push((range, res)));
106+
assert_eq!(unescaped, expected);
107+
}
108+
109+
check(
110+
"\\\n \u{a0} x",
111+
&[
112+
(0..5, Err(EscapeError::UnskippedWhitespaceWarning)),
113+
(3..5, Ok('\u{a0}')),
114+
(5..6, Ok(' ')),
115+
(6..7, Ok('x')),
116+
],
117+
);
118+
}
119+
101120
#[test]
102121
fn test_unescape_str_good() {
103122
fn check(literal_text: &str, expected: &str) {

compiler/rustc_parse/src/lexer/unescape_error_reporting.rs

+6
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,12 @@ pub(crate) fn emit_unescape_error(
253253
let msg = "invalid trailing slash in literal";
254254
handler.struct_span_err(span, msg).span_label(span, msg).emit();
255255
}
256+
EscapeError::UnskippedWhitespaceWarning => {
257+
let (c, char_span) = last_char();
258+
let msg =
259+
format!("non-ASCII whitespace symbol '{}' is not skipped", c.escape_unicode());
260+
handler.struct_span_warn(span, &msg).span_label(char_span, &msg).emit();
261+
}
256262
}
257263
}
258264

0 commit comments

Comments
 (0)