Skip to content

Commit 4d6e6ed

Browse files
authored
Rollup merge of rust-lang#134366 - harrisonkaiser:no-break-space, r=davidtwco
Fix logical error with what text is considered whitespace. There appears to be a logical issue around what counts as leading white-space. There is code which does a subtraction assuming that no errors will be reported inside the leading whitespace. However we compute the length of that whitespace with std::char::is_whitespace and not rustc_lexer::is_whitespace. The former will include a no-break space while later will excluded it. We can only safely make the assumption that no errors will be reported in whitespace if it is all "Rust Standard" whitespace. Indeed an error does occur in unicode whitespace if it contains a no-break space. In that case the subtraction will cause a ICE (for a compiler in debug mode) as described in rust-lang#132918.
2 parents 8a1f803 + 1e33dd1 commit 4d6e6ed

File tree

5 files changed

+34
-2
lines changed

5 files changed

+34
-2
lines changed

Cargo.lock

+1
Original file line numberDiff line numberDiff line change
@@ -3722,6 +3722,7 @@ dependencies = [
37223722
"rustc_fluent_macro",
37233723
"rustc_hir",
37243724
"rustc_index",
3725+
"rustc_lexer",
37253726
"rustc_lint_defs",
37263727
"rustc_macros",
37273728
"rustc_serialize",

compiler/rustc_errors/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ rustc_error_messages = { path = "../rustc_error_messages" }
1616
rustc_fluent_macro = { path = "../rustc_fluent_macro" }
1717
rustc_hir = { path = "../rustc_hir" }
1818
rustc_index = { path = "../rustc_index" }
19+
rustc_lexer = { path = "../rustc_lexer" }
1920
rustc_lint_defs = { path = "../rustc_lint_defs" }
2021
rustc_macros = { path = "../rustc_macros" }
2122
rustc_serialize = { path = "../rustc_serialize" }

compiler/rustc_errors/src/emitter.rs

+8-2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use derive_setters::Setters;
1919
use rustc_data_structures::fx::{FxHashMap, FxIndexMap, FxIndexSet};
2020
use rustc_data_structures::sync::{DynSend, IntoDynSyncSend, Lrc};
2121
use rustc_error_messages::{FluentArgs, SpanLabel};
22+
use rustc_lexer;
2223
use rustc_lint_defs::pluralize;
2324
use rustc_span::hygiene::{ExpnKind, MacroKind};
2425
use rustc_span::source_map::SourceMap;
@@ -1698,9 +1699,14 @@ impl HumanEmitter {
16981699
if let Some(source_string) =
16991700
line.line_index.checked_sub(1).and_then(|l| file.get_line(l))
17001701
{
1702+
// Whitespace can only be removed (aka considered leading)
1703+
// if the lexer considers it whitespace.
1704+
// non-rustc_lexer::is_whitespace() chars are reported as an
1705+
// error (ex. no-break-spaces \u{a0}), and thus can't be considered
1706+
// for removal during error reporting.
17011707
let leading_whitespace = source_string
17021708
.chars()
1703-
.take_while(|c| c.is_whitespace())
1709+
.take_while(|c| rustc_lexer::is_whitespace(*c))
17041710
.map(|c| {
17051711
match c {
17061712
// Tabs are displayed as 4 spaces
@@ -1709,7 +1715,7 @@ impl HumanEmitter {
17091715
}
17101716
})
17111717
.sum();
1712-
if source_string.chars().any(|c| !c.is_whitespace()) {
1718+
if source_string.chars().any(|c| !rustc_lexer::is_whitespace(c)) {
17131719
whitespace_margin = min(whitespace_margin, leading_whitespace);
17141720
}
17151721
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// Invalid whitespace (not listed here: https://doc.rust-lang.org/reference/whitespace.html
2+
// e.g. \u{a0}) before any other syntax on the line should not cause any integer overflow
3+
// in the emitter, even when the terminal width causes the line to be truncated.
4+
//
5+
// issue #132918
6+
7+
//@ check-fail
8+
//@ needs-rustc-debug-assertions
9+
//@ compile-flags: --diagnostic-width=1
10+
                                        fn main() { return; }
11+
//~^ ERROR unknown start of token: \u{a0}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
error: unknown start of token: \u{a0}
2+
--> $DIR/emitter-overflow-bad-whitespace.rs:10:1
3+
|
4+
LL |     ...
5+
| ^
6+
|
7+
help: Unicode character ' ' (No-Break Space) looks like ' ' (Space), but it is not
8+
|
9+
LL |                                       fn main() { return; }
10+
| +
11+
12+
error: aborting due to 1 previous error
13+

0 commit comments

Comments
 (0)