From 72cfa0aea2627fc98dae8feff96aac5613dfc338 Mon Sep 17 00:00:00 2001 From: Nilstrieb <48135649+Nilstrieb@users.noreply.github.com> Date: Tue, 27 Dec 2022 21:18:00 +0100 Subject: [PATCH 1/3] Verify that spans point to char boundaries This makes invalid spans a lot easier to debug. A quick and unscientific benchmarking test revealed that the performance impact of this is small at most. --- compiler/rustc_span/src/lib.rs | 16 ++++++++--- compiler/rustc_span/src/span_encoding.rs | 36 +++++++++++++++++++++++- 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index 65702f76fdac7..43d73585aa327 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -142,10 +142,11 @@ pub fn set_session_globals_then(session_globals: &SessionGlobals, f: impl FnO SESSION_GLOBALS.set(session_globals, f) } -pub fn create_session_if_not_set_then(edition: Edition, f: F) -> R -where - F: FnOnce(&SessionGlobals) -> R, -{ + +pub fn create_session_if_not_set_then( + edition: Edition, + f: impl FnOnce(&SessionGlobals) -> R, +) -> R { if !SESSION_GLOBALS.is_set() { let session_globals = SessionGlobals::new(edition); SESSION_GLOBALS.set(&session_globals, || SESSION_GLOBALS.with(f)) @@ -160,7 +161,14 @@ where { SESSION_GLOBALS.with(f) } +pub struct NotSet; + +#[inline] +pub fn try_with_session_globals(f: impl FnOnce(&SessionGlobals) -> R) -> Result { + if SESSION_GLOBALS.is_set() { Ok(SESSION_GLOBALS.with(f)) } else { Err(NotSet) } +} +#[inline] pub fn create_default_session_globals_then(f: impl FnOnce() -> R) -> R { create_session_globals_then(edition::DEFAULT_EDITION, f) } diff --git a/compiler/rustc_span/src/span_encoding.rs b/compiler/rustc_span/src/span_encoding.rs index e162695a13beb..c65e2bd0e6f18 100644 --- a/compiler/rustc_span/src/span_encoding.rs +++ b/compiler/rustc_span/src/span_encoding.rs @@ -1,6 +1,6 @@ use crate::def_id::{DefIndex, LocalDefId}; use crate::hygiene::SyntaxContext; -use crate::SPAN_TRACK; +use crate::{try_with_session_globals, NotSet, Pos, SPAN_TRACK}; use crate::{BytePos, SpanData}; use rustc_data_structures::fx::FxIndexSet; @@ -103,6 +103,40 @@ impl Span { ctxt: SyntaxContext, parent: Option, ) -> Self { + // Make sure that the byte positions are at char boundaries. + // Only do this if the session globals are set correctly, which they aren't in some unit tests. + if cfg!(debug_assertions) { + let _: Result<(), NotSet> = try_with_session_globals(|sess| { + let sm = sess.source_map.lock(); + if let Some(sm) = &*sm { + let offset = sm.lookup_byte_offset(lo); + if let Some(file) = &offset.sf.src { + // `is_char_boundary` already checks this, but asserting it seperately gives a better panic message. + assert!( + file.len() >= offset.pos.to_usize(), + "start of span is out of bounds" + ); + assert!( + file.is_char_boundary(offset.pos.to_usize()), + "start of span not on char boundary" + ); + } + + let offset = sm.lookup_byte_offset(hi); + if let Some(file) = &offset.sf.src { + assert!( + file.len() >= offset.pos.to_usize(), + "end of span is out of bounds" + ); + assert!( + file.is_char_boundary(offset.pos.to_usize()), + "end of span not on char boundary" + ); + } + } + }); + } + if lo > hi { std::mem::swap(&mut lo, &mut hi); } From b8e865d31c037802fc5158a636362e97dafdf5c7 Mon Sep 17 00:00:00 2001 From: Nilstrieb <48135649+Nilstrieb@users.noreply.github.com> Date: Wed, 28 Dec 2022 16:49:32 +0100 Subject: [PATCH 2/3] Fix `SourceMap::start_point` for empty spans When the span is empty, it doesn't really have a first character. Even worse, when it's empty at the end of the file, adding a byte offset will make it be out of bounds. So we just return the empty span in these cases. --- compiler/rustc_span/src/source_map.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs index 8253ffefcaa7d..349b310dceecf 100644 --- a/compiler/rustc_span/src/source_map.rs +++ b/compiler/rustc_span/src/source_map.rs @@ -824,7 +824,12 @@ impl SourceMap { } /// Returns a new span representing just the first character of the given span. + /// When the span is empty, the same empty span is returned. pub fn start_point(&self, sp: Span) -> Span { + if sp.is_empty() { + return sp; + } + let width = { let sp = sp.data(); let local_begin = self.lookup_byte_offset(sp.lo); From bc8aa22f166728f0608a41b68a2328c542a6ca45 Mon Sep 17 00:00:00 2001 From: Nilstrieb <48135649+Nilstrieb@users.noreply.github.com> Date: Fri, 12 Jan 2024 22:47:20 +0100 Subject: [PATCH 3/3] fmt --- compiler/rustc_span/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index 43d73585aa327..1b0ab2dcb1056 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -142,7 +142,6 @@ pub fn set_session_globals_then(session_globals: &SessionGlobals, f: impl FnO SESSION_GLOBALS.set(session_globals, f) } - pub fn create_session_if_not_set_then( edition: Edition, f: impl FnOnce(&SessionGlobals) -> R,