From a03fbfe2ff3e7dd03af42d337b11552e782e2dc4 Mon Sep 17 00:00:00 2001 From: Anton Golov Date: Sat, 31 Jul 2021 20:35:37 +0200 Subject: [PATCH 1/4] Warn when an escaped newline skips multiple lines --- compiler/rustc_lexer/src/unescape.rs | 9 +++++++++ compiler/rustc_lexer/src/unescape/tests.rs | 5 +++++ .../rustc_parse/src/lexer/unescape_error_reporting.rs | 5 +++++ 3 files changed, 19 insertions(+) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index 9a96c03cd3c80..6550eed39583c 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -60,6 +60,9 @@ pub enum EscapeError { /// After a line ending with '\', the next line contains whitespace /// characters that are not skipped. UnskippedWhitespaceWarning, + + /// After a line ending with '\', multiple lines are skipped. + MultipleSkippedLinesWarning, } impl EscapeError { @@ -67,6 +70,7 @@ impl EscapeError { pub fn is_fatal(&self) -> bool { match self { EscapeError::UnskippedWhitespaceWarning => false, + EscapeError::MultipleSkippedLinesWarning => false, _ => true, } } @@ -320,6 +324,11 @@ where .bytes() .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r') .unwrap_or(str.len()); + if str[1..first_non_space].contains('\n') { + // The +1 accounts for the escaping slash. + let end = start + first_non_space + 1; + callback(start..end, Err(EscapeError::MultipleSkippedLinesWarning)); + } let tail = &str[first_non_space..]; if let Some(c) = tail.chars().nth(0) { // For error reporting, we would like the span to contain the character that was not diff --git a/compiler/rustc_lexer/src/unescape/tests.rs b/compiler/rustc_lexer/src/unescape/tests.rs index 1f4dbb20f4e98..fa61554afde6c 100644 --- a/compiler/rustc_lexer/src/unescape/tests.rs +++ b/compiler/rustc_lexer/src/unescape/tests.rs @@ -106,6 +106,10 @@ fn test_unescape_str_warn() { assert_eq!(unescaped, expected); } + // Check we can handle escaped newlines at the end of a file. + check("\\\n", &[]); + check("\\\n ", &[]); + check( "\\\n \u{a0} x", &[ @@ -115,6 +119,7 @@ fn test_unescape_str_warn() { (6..7, Ok('x')), ], ); + check("\\\n \n x", &[(0..7, Err(EscapeError::MultipleSkippedLinesWarning)), (7..8, Ok('x'))]); } #[test] diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index 4e95cdc0efa5f..aa6b424ce2b57 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -280,6 +280,11 @@ pub(crate) fn emit_unescape_error( format!("non-ASCII whitespace symbol '{}' is not skipped", c.escape_unicode()); handler.struct_span_warn(span, &msg).span_label(char_span, &msg).emit(); } + EscapeError::MultipleSkippedLinesWarning => { + let msg = "multiple lines skipped by escaped newline"; + let bottom_msg = "skipping everything up to and including this point"; + handler.struct_span_warn(span, msg).span_label(span, bottom_msg).emit(); + } } } From 2dff700c4ff3fce32104f614eff8396ef5b0e39a Mon Sep 17 00:00:00 2001 From: Anton Golov Date: Sat, 31 Jul 2021 23:09:19 +0200 Subject: [PATCH 2/4] Update format string tests to explicitly escape multiple newlines From what I can tell, the goal of the tests is to ensure that the error formatting is correct. I think this is still being tested as intended after this change. --- src/test/ui/fmt/format-string-error-2.rs | 6 +++--- src/test/ui/fmt/format-string-error-2.stderr | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/test/ui/fmt/format-string-error-2.rs b/src/test/ui/fmt/format-string-error-2.rs index 69fed2cb69ad8..1f7f0d8f6be6a 100644 --- a/src/test/ui/fmt/format-string-error-2.rs +++ b/src/test/ui/fmt/format-string-error-2.rs @@ -5,7 +5,7 @@ fn main() { a"); //~^ ERROR invalid format string format!("{ \ - + \ b"); //~^ ERROR invalid format string format!(r#"{ \ @@ -38,12 +38,12 @@ fn main() { { \ \ b \ - + \ "); //~^^^ ERROR invalid format string format!(r#" raw { \ - + \ c"#); //~^^^ ERROR invalid format string format!(r#" diff --git a/src/test/ui/fmt/format-string-error-2.stderr b/src/test/ui/fmt/format-string-error-2.stderr index c421fe49ef0a4..76cdfbb93bf24 100644 --- a/src/test/ui/fmt/format-string-error-2.stderr +++ b/src/test/ui/fmt/format-string-error-2.stderr @@ -19,7 +19,7 @@ error: invalid format string: expected `'}'`, found `'b'` | LL | format!("{ \ | - because of this opening brace -LL | +LL | \ LL | b"); | ^ expected `}` in format string | From efe069c599d0d0ed96b8e436bed3850b8ba055ae Mon Sep 17 00:00:00 2001 From: Anton Golov Date: Wed, 11 Aug 2021 12:13:24 +0200 Subject: [PATCH 3/4] Add UI tests for string escape warnings. --- src/test/ui/str/str-escape.rs | 11 +++++++++++ src/test/ui/str/str-escape.stderr | 21 +++++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 src/test/ui/str/str-escape.rs create mode 100644 src/test/ui/str/str-escape.stderr diff --git a/src/test/ui/str/str-escape.rs b/src/test/ui/str/str-escape.rs new file mode 100644 index 0000000000000..0264632fd24a1 --- /dev/null +++ b/src/test/ui/str/str-escape.rs @@ -0,0 +1,11 @@ +// check-pass +fn main() { + let s = "\ + + "; + //~^^^ WARNING multiple lines skipped by escaped newline + let s = "foo\ +   bar + "; + //~^^^ WARNING non-ASCII whitespace symbol '\u{a0}' is not skipped +} diff --git a/src/test/ui/str/str-escape.stderr b/src/test/ui/str/str-escape.stderr new file mode 100644 index 0000000000000..b2501f1a2145f --- /dev/null +++ b/src/test/ui/str/str-escape.stderr @@ -0,0 +1,21 @@ +warning: multiple lines skipped by escaped newline + --> $DIR/str-escape.rs:3:14 + | +LL | let s = "\ + | ______________^ +LL | | +LL | | "; + | |_____________^ skipping everything up to and including this point + +warning: non-ASCII whitespace symbol '\u{a0}' is not skipped + --> $DIR/str-escape.rs:7:17 + | +LL | let s = "foo\ + | _________________^ +LL | |   bar + | | ^ non-ASCII whitespace symbol '\u{a0}' is not skipped + | |___| + | + +warning: 2 warnings emitted + From 07aacf53c5105435358e7993b5f8cc96ef454c52 Mon Sep 17 00:00:00 2001 From: Anton Golov Date: Wed, 11 Aug 2021 13:57:28 +0200 Subject: [PATCH 4/4] Renamed variable str -> tail for clarity --- compiler/rustc_lexer/src/unescape.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index 6550eed39583c..b970c9e4911fa 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -319,17 +319,17 @@ where where F: FnMut(Range, Result), { - let str = chars.as_str(); - let first_non_space = str + let tail = chars.as_str(); + let first_non_space = tail .bytes() .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r') - .unwrap_or(str.len()); - if str[1..first_non_space].contains('\n') { + .unwrap_or(tail.len()); + if tail[1..first_non_space].contains('\n') { // The +1 accounts for the escaping slash. let end = start + first_non_space + 1; callback(start..end, Err(EscapeError::MultipleSkippedLinesWarning)); } - let tail = &str[first_non_space..]; + let tail = &tail[first_non_space..]; if let Some(c) = tail.chars().nth(0) { // For error reporting, we would like the span to contain the character that was not // skipped. The +1 is necessary to account for the leading \ that started the escape.