diff --git a/Cargo.lock b/Cargo.lock index 5eeb855aacb87..b53d89ba88640 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3703,6 +3703,7 @@ dependencies = [ "rustc_fluent_macro", "rustc_hir", "rustc_index", + "rustc_lexer", "rustc_lint_defs", "rustc_macros", "rustc_serialize", diff --git a/compiler/rustc_errors/Cargo.toml b/compiler/rustc_errors/Cargo.toml index 06bae57638f3c..66b9adbead0b6 100644 --- a/compiler/rustc_errors/Cargo.toml +++ b/compiler/rustc_errors/Cargo.toml @@ -16,6 +16,7 @@ rustc_error_messages = { path = "../rustc_error_messages" } rustc_fluent_macro = { path = "../rustc_fluent_macro" } rustc_hir = { path = "../rustc_hir" } rustc_index = { path = "../rustc_index" } +rustc_lexer = { path = "../rustc_lexer" } rustc_lint_defs = { path = "../rustc_lint_defs" } rustc_macros = { path = "../rustc_macros" } rustc_serialize = { path = "../rustc_serialize" } diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs index ac2f91cdeb3fe..17908bca26afc 100644 --- a/compiler/rustc_errors/src/emitter.rs +++ b/compiler/rustc_errors/src/emitter.rs @@ -19,6 +19,7 @@ use derive_setters::Setters; use rustc_data_structures::fx::{FxHashMap, FxIndexMap, FxIndexSet}; use rustc_data_structures::sync::{DynSend, IntoDynSyncSend, Lrc}; use rustc_error_messages::{FluentArgs, SpanLabel}; +use rustc_lexer; use rustc_lint_defs::pluralize; use rustc_span::hygiene::{ExpnKind, MacroKind}; use rustc_span::source_map::SourceMap; @@ -1698,9 +1699,14 @@ impl HumanEmitter { if let Some(source_string) = line.line_index.checked_sub(1).and_then(|l| file.get_line(l)) { + // Whitespace can only be removed (aka considered leading) + // if the lexer considers it whitespace. + // non-rustc_lexer::is_whitespace() chars are reported as an + // error (ex. no-break-spaces \u{a0}), and thus can't be considered + // for removal during error reporting. let leading_whitespace = source_string .chars() - .take_while(|c| c.is_whitespace()) + .take_while(|c| rustc_lexer::is_whitespace(*c)) .map(|c| { match c { // Tabs are displayed as 4 spaces @@ -1709,7 +1715,7 @@ impl HumanEmitter { } }) .sum(); - if source_string.chars().any(|c| !c.is_whitespace()) { + if source_string.chars().any(|c| !rustc_lexer::is_whitespace(c)) { whitespace_margin = min(whitespace_margin, leading_whitespace); } } diff --git a/tests/ui/errors/emitter-overflow-bad-whitespace.rs b/tests/ui/errors/emitter-overflow-bad-whitespace.rs new file mode 100644 index 0000000000000..d024d1f3921b2 --- /dev/null +++ b/tests/ui/errors/emitter-overflow-bad-whitespace.rs @@ -0,0 +1,11 @@ +// Invalid whitespace (not listed here: https://doc.rust-lang.org/reference/whitespace.html +// e.g. \u{a0}) before any other syntax on the line should not cause any integer overflow +// in the emitter, even when the terminal width causes the line to be truncated. +// +// issue #132918 + +//@ check-fail +//@ needs-rustc-debug-assertions +//@ compile-flags: --diagnostic-width=1 +                                        fn main() { return; } +//~^ ERROR unknown start of token: \u{a0} diff --git a/tests/ui/errors/emitter-overflow-bad-whitespace.stderr b/tests/ui/errors/emitter-overflow-bad-whitespace.stderr new file mode 100644 index 0000000000000..8c0c097bcb9c4 --- /dev/null +++ b/tests/ui/errors/emitter-overflow-bad-whitespace.stderr @@ -0,0 +1,13 @@ +error: unknown start of token: \u{a0} + --> $DIR/emitter-overflow-bad-whitespace.rs:10:1 + | +LL |     ... + | ^ + | +help: Unicode character ' ' (No-Break Space) looks like ' ' (Space), but it is not + | +LL |                                       fn main() { return; } + | + + +error: aborting due to 1 previous error +