From 5cab8ae4a4a033acb25d629871deb24db19b638b Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 6 Aug 2024 19:02:01 +0200 Subject: [PATCH 01/14] miri: make vtable addresses not globally unique --- .../rustc_const_eval/src/interpret/machine.rs | 18 ++- .../rustc_const_eval/src/interpret/memory.rs | 5 +- .../rustc_const_eval/src/interpret/place.rs | 3 +- .../rustc_const_eval/src/interpret/traits.rs | 4 +- .../rustc_middle/src/mir/interpret/mod.rs | 145 ++++++------------ compiler/rustc_middle/src/ty/context.rs | 4 +- compiler/rustc_middle/src/ty/vtable.rs | 11 +- .../src/build/expr/as_constant.rs | 6 +- src/tools/miri/src/lib.rs | 1 + src/tools/miri/src/machine.rs | 47 +++++- src/tools/miri/tests/pass/dyn-traits.rs | 10 ++ .../miri/tests/pass/function_pointers.rs | 3 +- src/tools/miri/tests/pass/rc.rs | 3 +- 13 files changed, 148 insertions(+), 112 deletions(-) diff --git a/compiler/rustc_const_eval/src/interpret/machine.rs b/compiler/rustc_const_eval/src/interpret/machine.rs index bdce8253b2e76..1aa0d6af9ec21 100644 --- a/compiler/rustc_const_eval/src/interpret/machine.rs +++ b/compiler/rustc_const_eval/src/interpret/machine.rs @@ -19,7 +19,7 @@ use rustc_target::spec::abi::Abi as CallAbi; use super::{ throw_unsup, throw_unsup_format, AllocBytes, AllocId, AllocKind, AllocRange, Allocation, ConstAllocation, CtfeProvenance, FnArg, Frame, ImmTy, InterpCx, InterpResult, MPlaceTy, - MemoryKind, Misalignment, OpTy, PlaceTy, Pointer, Provenance, + MemoryKind, Misalignment, OpTy, PlaceTy, Pointer, Provenance, CTFE_ALLOC_SALT, }; /// Data returned by [`Machine::after_stack_pop`], and consumed by @@ -575,6 +575,14 @@ pub trait Machine<'tcx>: Sized { { eval(ecx, val, span, layout) } + + /// Returns the salt to be used for a deduplicated global alloation. + /// If the allocation is for a function, the instance is provided as well + /// (this lets Miri ensure unique addresses for some functions). + fn get_global_alloc_salt( + ecx: &InterpCx<'tcx, Self>, + instance: Option>, + ) -> usize; } /// A lot of the flexibility above is just needed for `Miri`, but all "compile-time" machines @@ -677,4 +685,12 @@ pub macro compile_time_machine(<$tcx: lifetime>) { let (prov, offset) = ptr.into_parts(); Some((prov.alloc_id(), offset, prov.immutable())) } + + #[inline(always)] + fn get_global_alloc_salt( + _ecx: &InterpCx<$tcx, Self>, + _instance: Option>, + ) -> usize { + CTFE_ALLOC_SALT + } } diff --git a/compiler/rustc_const_eval/src/interpret/memory.rs b/compiler/rustc_const_eval/src/interpret/memory.rs index 2e5d0ae773654..910aec9b8e1de 100644 --- a/compiler/rustc_const_eval/src/interpret/memory.rs +++ b/compiler/rustc_const_eval/src/interpret/memory.rs @@ -195,7 +195,10 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { pub fn fn_ptr(&mut self, fn_val: FnVal<'tcx, M::ExtraFnVal>) -> Pointer { let id = match fn_val { - FnVal::Instance(instance) => self.tcx.reserve_and_set_fn_alloc(instance), + FnVal::Instance(instance) => { + let salt = M::get_global_alloc_salt(self, Some(instance)); + self.tcx.reserve_and_set_fn_alloc(instance, salt) + } FnVal::Other(extra) => { // FIXME(RalfJung): Should we have a cache here? let id = self.tcx.reserve_alloc_id(); diff --git a/compiler/rustc_const_eval/src/interpret/place.rs b/compiler/rustc_const_eval/src/interpret/place.rs index 470a62026b943..2afdd02c88035 100644 --- a/compiler/rustc_const_eval/src/interpret/place.rs +++ b/compiler/rustc_const_eval/src/interpret/place.rs @@ -1008,7 +1008,8 @@ where // Use cache for immutable strings. let ptr = if mutbl.is_not() { // Use dedup'd allocation function. - let id = tcx.allocate_bytes_dedup(str.as_bytes()); + let salt = M::get_global_alloc_salt(self, None); + let id = tcx.allocate_bytes_dedup(str.as_bytes(), salt); // Turn untagged "global" pointers (obtained via `tcx`) into the machine pointer to the allocation. M::adjust_alloc_root_pointer(&self, Pointer::from(id), Some(kind))? diff --git a/compiler/rustc_const_eval/src/interpret/traits.rs b/compiler/rustc_const_eval/src/interpret/traits.rs index fb50661b8263d..cd4faf06655fe 100644 --- a/compiler/rustc_const_eval/src/interpret/traits.rs +++ b/compiler/rustc_const_eval/src/interpret/traits.rs @@ -28,7 +28,9 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { ensure_monomorphic_enough(*self.tcx, ty)?; ensure_monomorphic_enough(*self.tcx, poly_trait_ref)?; - let vtable_symbolic_allocation = self.tcx.reserve_and_set_vtable_alloc(ty, poly_trait_ref); + let salt = M::get_global_alloc_salt(self, None); + let vtable_symbolic_allocation = + self.tcx.reserve_and_set_vtable_alloc(ty, poly_trait_ref, salt); let vtable_ptr = self.global_root_pointer(Pointer::from(vtable_symbolic_allocation))?; Ok(vtable_ptr.into()) } diff --git a/compiler/rustc_middle/src/mir/interpret/mod.rs b/compiler/rustc_middle/src/mir/interpret/mod.rs index 1851a61d7533c..91e71c12cae45 100644 --- a/compiler/rustc_middle/src/mir/interpret/mod.rs +++ b/compiler/rustc_middle/src/mir/interpret/mod.rs @@ -13,7 +13,6 @@ use std::num::NonZero; use std::{fmt, io}; use rustc_ast::LitKind; -use rustc_attr::InlineAttr; use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::sync::Lock; use rustc_errors::ErrorGuaranteed; @@ -46,7 +45,7 @@ pub use self::pointer::{CtfeProvenance, Pointer, PointerArithmetic, Provenance}; pub use self::value::Scalar; use crate::mir; use crate::ty::codec::{TyDecoder, TyEncoder}; -use crate::ty::{self, GenericArgKind, Instance, Ty, TyCtxt}; +use crate::ty::{self, Instance, Ty, TyCtxt}; /// Uniquely identifies one of the following: /// - A constant @@ -126,11 +125,10 @@ pub fn specialized_encode_alloc_id<'tcx, E: TyEncoder>>( AllocDiscriminant::Alloc.encode(encoder); alloc.encode(encoder); } - GlobalAlloc::Function { instance, unique } => { + GlobalAlloc::Function { instance } => { trace!("encoding {:?} with {:#?}", alloc_id, instance); AllocDiscriminant::Fn.encode(encoder); instance.encode(encoder); - unique.encode(encoder); } GlobalAlloc::VTable(ty, poly_trait_ref) => { trace!("encoding {:?} with {ty:#?}, {poly_trait_ref:#?}", alloc_id); @@ -219,38 +217,32 @@ impl<'s> AllocDecodingSession<'s> { } // Now decode the actual data. - let alloc_id = decoder.with_position(pos, |decoder| { - match alloc_kind { - AllocDiscriminant::Alloc => { - trace!("creating memory alloc ID"); - let alloc = as Decodable<_>>::decode(decoder); - trace!("decoded alloc {:?}", alloc); - decoder.interner().reserve_and_set_memory_alloc(alloc) - } - AllocDiscriminant::Fn => { - trace!("creating fn alloc ID"); - let instance = ty::Instance::decode(decoder); - trace!("decoded fn alloc instance: {:?}", instance); - let unique = bool::decode(decoder); - // Here we cannot call `reserve_and_set_fn_alloc` as that would use a query, which - // is not possible in this context. That's why the allocation stores - // whether it is unique or not. - decoder.interner().reserve_and_set_fn_alloc_internal(instance, unique) - } - AllocDiscriminant::VTable => { - trace!("creating vtable alloc ID"); - let ty = as Decodable>::decode(decoder); - let poly_trait_ref = - > as Decodable>::decode(decoder); - trace!("decoded vtable alloc instance: {ty:?}, {poly_trait_ref:?}"); - decoder.interner().reserve_and_set_vtable_alloc(ty, poly_trait_ref) - } - AllocDiscriminant::Static => { - trace!("creating extern static alloc ID"); - let did = >::decode(decoder); - trace!("decoded static def-ID: {:?}", did); - decoder.interner().reserve_and_set_static_alloc(did) - } + let alloc_id = decoder.with_position(pos, |decoder| match alloc_kind { + AllocDiscriminant::Alloc => { + trace!("creating memory alloc ID"); + let alloc = as Decodable<_>>::decode(decoder); + trace!("decoded alloc {:?}", alloc); + decoder.interner().reserve_and_set_memory_alloc(alloc) + } + AllocDiscriminant::Fn => { + trace!("creating fn alloc ID"); + let instance = ty::Instance::decode(decoder); + trace!("decoded fn alloc instance: {:?}", instance); + decoder.interner().reserve_and_set_fn_alloc(instance, CTFE_ALLOC_SALT) + } + AllocDiscriminant::VTable => { + trace!("creating vtable alloc ID"); + let ty = as Decodable>::decode(decoder); + let poly_trait_ref = + > as Decodable>::decode(decoder); + trace!("decoded vtable alloc instance: {ty:?}, {poly_trait_ref:?}"); + decoder.interner().reserve_and_set_vtable_alloc(ty, poly_trait_ref, CTFE_ALLOC_SALT) + } + AllocDiscriminant::Static => { + trace!("creating extern static alloc ID"); + let did = >::decode(decoder); + trace!("decoded static def-ID: {:?}", did); + decoder.interner().reserve_and_set_static_alloc(did) } }); @@ -265,12 +257,7 @@ impl<'s> AllocDecodingSession<'s> { #[derive(Debug, Clone, Eq, PartialEq, Hash, TyDecodable, TyEncodable, HashStable)] pub enum GlobalAlloc<'tcx> { /// The alloc ID is used as a function pointer. - Function { - instance: Instance<'tcx>, - /// Stores whether this instance is unique, i.e. all pointers to this function use the same - /// alloc ID. - unique: bool, - }, + Function { instance: Instance<'tcx> }, /// This alloc ID points to a symbolic (not-reified) vtable. VTable(Ty<'tcx>, Option>), /// The alloc ID points to a "lazy" static variable that did not get computed (yet). @@ -323,14 +310,17 @@ impl<'tcx> GlobalAlloc<'tcx> { } } +pub const CTFE_ALLOC_SALT: usize = 0; + pub(crate) struct AllocMap<'tcx> { /// Maps `AllocId`s to their corresponding allocations. alloc_map: FxHashMap>, - /// Used to ensure that statics and functions only get one associated `AllocId`. - // - // FIXME: Should we just have two separate dedup maps for statics and functions each? - dedup: FxHashMap, AllocId>, + /// Used to deduplicate global allocations: functions, vtables, string literals, ... + /// + /// The `usize` is a "salt" used by Miri to make deduplication imperfect, thus better emulating + /// the actual guarantees. + dedup: FxHashMap<(GlobalAlloc<'tcx>, usize), AllocId>, /// The `AllocId` to assign to the next requested ID. /// Always incremented; never gets smaller. @@ -368,74 +358,40 @@ impl<'tcx> TyCtxt<'tcx> { /// Reserves a new ID *if* this allocation has not been dedup-reserved before. /// Should not be used for mutable memory. - fn reserve_and_set_dedup(self, alloc: GlobalAlloc<'tcx>) -> AllocId { + fn reserve_and_set_dedup(self, alloc: GlobalAlloc<'tcx>, salt: usize) -> AllocId { let mut alloc_map = self.alloc_map.lock(); if let GlobalAlloc::Memory(mem) = alloc { if mem.inner().mutability.is_mut() { bug!("trying to dedup-reserve mutable memory"); } } - if let Some(&alloc_id) = alloc_map.dedup.get(&alloc) { + let alloc_salt = (alloc, salt); + if let Some(&alloc_id) = alloc_map.dedup.get(&alloc_salt) { return alloc_id; } let id = alloc_map.reserve(); - debug!("creating alloc {alloc:?} with id {id:?}"); - alloc_map.alloc_map.insert(id, alloc.clone()); - alloc_map.dedup.insert(alloc, id); + debug!("creating alloc {:?} with id {id:?}", alloc_salt.0); + alloc_map.alloc_map.insert(id, alloc_salt.0.clone()); + alloc_map.dedup.insert(alloc_salt, id); id } /// Generates an `AllocId` for a memory allocation. If the exact same memory has been /// allocated before, this will return the same `AllocId`. - pub fn reserve_and_set_memory_dedup(self, mem: ConstAllocation<'tcx>) -> AllocId { - self.reserve_and_set_dedup(GlobalAlloc::Memory(mem)) + pub fn reserve_and_set_memory_dedup(self, mem: ConstAllocation<'tcx>, salt: usize) -> AllocId { + self.reserve_and_set_dedup(GlobalAlloc::Memory(mem), salt) } /// Generates an `AllocId` for a static or return a cached one in case this function has been /// called on the same static before. pub fn reserve_and_set_static_alloc(self, static_id: DefId) -> AllocId { - self.reserve_and_set_dedup(GlobalAlloc::Static(static_id)) - } - - /// Generates an `AllocId` for a function. The caller must already have decided whether this - /// function obtains a unique AllocId or gets de-duplicated via the cache. - fn reserve_and_set_fn_alloc_internal(self, instance: Instance<'tcx>, unique: bool) -> AllocId { - let alloc = GlobalAlloc::Function { instance, unique }; - if unique { - // Deduplicate. - self.reserve_and_set_dedup(alloc) - } else { - // Get a fresh ID. - let mut alloc_map = self.alloc_map.lock(); - let id = alloc_map.reserve(); - alloc_map.alloc_map.insert(id, alloc); - id - } + let salt = 0; // Statics have a guaranteed unique address, no salt added. + self.reserve_and_set_dedup(GlobalAlloc::Static(static_id), salt) } - /// Generates an `AllocId` for a function. Depending on the function type, - /// this might get deduplicated or assigned a new ID each time. - pub fn reserve_and_set_fn_alloc(self, instance: Instance<'tcx>) -> AllocId { - // Functions cannot be identified by pointers, as asm-equal functions can get deduplicated - // by the linker (we set the "unnamed_addr" attribute for LLVM) and functions can be - // duplicated across crates. We thus generate a new `AllocId` for every mention of a - // function. This means that `main as fn() == main as fn()` is false, while `let x = main as - // fn(); x == x` is true. However, as a quality-of-life feature it can be useful to identify - // certain functions uniquely, e.g. for backtraces. So we identify whether codegen will - // actually emit duplicate functions. It does that when they have non-lifetime generics, or - // when they can be inlined. All other functions are given a unique address. - // This is not a stable guarantee! The `inline` attribute is a hint and cannot be relied - // upon for anything. But if we don't do this, backtraces look terrible. - let is_generic = instance - .args - .into_iter() - .any(|kind| !matches!(kind.unpack(), GenericArgKind::Lifetime(_))); - let can_be_inlined = match self.codegen_fn_attrs(instance.def_id()).inline { - InlineAttr::Never => false, - _ => true, - }; - let unique = !is_generic && !can_be_inlined; - self.reserve_and_set_fn_alloc_internal(instance, unique) + /// Generates an `AllocId` for a function. Will get deduplicated. + pub fn reserve_and_set_fn_alloc(self, instance: Instance<'tcx>, salt: usize) -> AllocId { + self.reserve_and_set_dedup(GlobalAlloc::Function { instance }, salt) } /// Generates an `AllocId` for a (symbolic, not-reified) vtable. Will get deduplicated. @@ -443,8 +399,9 @@ impl<'tcx> TyCtxt<'tcx> { self, ty: Ty<'tcx>, poly_trait_ref: Option>, + salt: usize, ) -> AllocId { - self.reserve_and_set_dedup(GlobalAlloc::VTable(ty, poly_trait_ref)) + self.reserve_and_set_dedup(GlobalAlloc::VTable(ty, poly_trait_ref), salt) } /// Interns the `Allocation` and return a new `AllocId`, even if there's already an identical diff --git a/compiler/rustc_middle/src/ty/context.rs b/compiler/rustc_middle/src/ty/context.rs index 8f8fd09c9e4d9..07fed7b17ad33 100644 --- a/compiler/rustc_middle/src/ty/context.rs +++ b/compiler/rustc_middle/src/ty/context.rs @@ -1438,11 +1438,11 @@ impl<'tcx> TyCtxt<'tcx> { /// Allocates a read-only byte or string literal for `mir::interpret`. /// Returns the same `AllocId` if called again with the same bytes. - pub fn allocate_bytes_dedup(self, bytes: &[u8]) -> interpret::AllocId { + pub fn allocate_bytes_dedup(self, bytes: &[u8], salt: usize) -> interpret::AllocId { // Create an allocation that just contains these bytes. let alloc = interpret::Allocation::from_bytes_byte_aligned_immutable(bytes); let alloc = self.mk_const_alloc(alloc); - self.reserve_and_set_memory_dedup(alloc) + self.reserve_and_set_memory_dedup(alloc, salt) } /// Returns a range of the start/end indices specified with the diff --git a/compiler/rustc_middle/src/ty/vtable.rs b/compiler/rustc_middle/src/ty/vtable.rs index f38f27b84f0b9..951112dfe858e 100644 --- a/compiler/rustc_middle/src/ty/vtable.rs +++ b/compiler/rustc_middle/src/ty/vtable.rs @@ -3,7 +3,7 @@ use std::fmt; use rustc_ast::Mutability; use rustc_macros::HashStable; -use crate::mir::interpret::{alloc_range, AllocId, Allocation, Pointer, Scalar}; +use crate::mir::interpret::{alloc_range, AllocId, Allocation, Pointer, Scalar, CTFE_ALLOC_SALT}; use crate::ty::{self, Instance, PolyTraitRef, Ty, TyCtxt}; #[derive(Clone, Copy, PartialEq, HashStable)] @@ -73,6 +73,11 @@ pub(crate) fn vtable_min_entries<'tcx>( /// Retrieves an allocation that represents the contents of a vtable. /// Since this is a query, allocations are cached and not duplicated. +/// +/// This is an "internal" `AllocId` that should never be used as a value in the interpreted program. +/// The interpreter should use `AllocId` that refer to a `GlobalAlloc::VTable` instead. +/// (This is similar to statics, which also have a similar "internal" `AllocId` storing their +/// initial contents.) pub(super) fn vtable_allocation_provider<'tcx>( tcx: TyCtxt<'tcx>, key: (Ty<'tcx>, Option>), @@ -114,7 +119,7 @@ pub(super) fn vtable_allocation_provider<'tcx>( VtblEntry::MetadataDropInPlace => { if ty.needs_drop(tcx, ty::ParamEnv::reveal_all()) { let instance = ty::Instance::resolve_drop_in_place(tcx, ty); - let fn_alloc_id = tcx.reserve_and_set_fn_alloc(instance); + let fn_alloc_id = tcx.reserve_and_set_fn_alloc(instance, CTFE_ALLOC_SALT); let fn_ptr = Pointer::from(fn_alloc_id); Scalar::from_pointer(fn_ptr, &tcx) } else { @@ -127,7 +132,7 @@ pub(super) fn vtable_allocation_provider<'tcx>( VtblEntry::Method(instance) => { // Prepare the fn ptr we write into the vtable. let instance = instance.polymorphize(tcx); - let fn_alloc_id = tcx.reserve_and_set_fn_alloc(instance); + let fn_alloc_id = tcx.reserve_and_set_fn_alloc(instance, CTFE_ALLOC_SALT); let fn_ptr = Pointer::from(fn_alloc_id); Scalar::from_pointer(fn_ptr, &tcx) } diff --git a/compiler/rustc_mir_build/src/build/expr/as_constant.rs b/compiler/rustc_mir_build/src/build/expr/as_constant.rs index 10cf545f1b79d..4430aab73a819 100644 --- a/compiler/rustc_mir_build/src/build/expr/as_constant.rs +++ b/compiler/rustc_mir_build/src/build/expr/as_constant.rs @@ -2,7 +2,9 @@ use rustc_ast as ast; use rustc_hir::LangItem; -use rustc_middle::mir::interpret::{Allocation, LitToConstError, LitToConstInput, Scalar}; +use rustc_middle::mir::interpret::{ + Allocation, LitToConstError, LitToConstInput, Scalar, CTFE_ALLOC_SALT, +}; use rustc_middle::mir::*; use rustc_middle::thir::*; use rustc_middle::ty::{ @@ -140,7 +142,7 @@ fn lit_to_mir_constant<'tcx>( ConstValue::Slice { data: allocation, meta: allocation.inner().size().bytes() } } (ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _)) if inner_ty.is_array() => { - let id = tcx.allocate_bytes_dedup(data); + let id = tcx.allocate_bytes_dedup(data, CTFE_ALLOC_SALT); ConstValue::Scalar(Scalar::from_pointer(id.into(), &tcx)) } (ast::LitKind::CStr(data, _), ty::Ref(_, inner_ty, _)) if matches!(inner_ty.kind(), ty::Adt(def, _) if tcx.is_lang_item(def.did(), LangItem::CStr)) => diff --git a/src/tools/miri/src/lib.rs b/src/tools/miri/src/lib.rs index 2b3ae6df5de8a..966d38508f612 100644 --- a/src/tools/miri/src/lib.rs +++ b/src/tools/miri/src/lib.rs @@ -56,6 +56,7 @@ extern crate either; extern crate tracing; // The rustc crates we need +extern crate rustc_attr; extern crate rustc_apfloat; extern crate rustc_ast; extern crate rustc_const_eval; diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs index 3e45a3a7e1a12..df4154bcb5892 100644 --- a/src/tools/miri/src/machine.rs +++ b/src/tools/miri/src/machine.rs @@ -11,6 +11,7 @@ use rand::rngs::StdRng; use rand::Rng; use rand::SeedableRng; +use rustc_attr::InlineAttr; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; #[allow(unused)] use rustc_data_structures::static_assert_size; @@ -47,10 +48,10 @@ pub const SIGRTMIN: i32 = 34; /// `SIGRTMAX` - `SIGRTMIN` >= 8 (which is the value of `_POSIX_RTSIG_MAX`) pub const SIGRTMAX: i32 = 42; -/// Each const has multiple addresses, but only this many. Since const allocations are never -/// deallocated, choosing a new [`AllocId`] and thus base address for each evaluation would -/// produce unbounded memory usage. -const ADDRS_PER_CONST: usize = 16; +/// Each anonymous global (constant, vtable, function pointer, ...) has multiple addresses, but only +/// this many. Since const allocations are never deallocated, choosing a new [`AllocId`] and thus +/// base address for each evaluation would produce unbounded memory usage. +const ADDRS_PER_ANON_GLOBAL: usize = 32; /// Extra data stored with each stack frame pub struct FrameExtra<'tcx> { @@ -1372,7 +1373,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> { catch_unwind: None, timing, is_user_relevant: ecx.machine.is_user_relevant(&frame), - salt: ecx.machine.rng.borrow_mut().gen::() % ADDRS_PER_CONST, + salt: ecx.machine.rng.borrow_mut().gen::() % ADDRS_PER_ANON_GLOBAL, }; Ok(frame.with_extra(extra)) @@ -1518,4 +1519,40 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> { Entry::Occupied(oe) => Ok(oe.get().clone()), } } + + fn get_global_alloc_salt( + ecx: &InterpCx<'tcx, Self>, + instance: Option>, + ) -> usize { + let unique = if let Some(instance) = instance { + // Functions cannot be identified by pointers, as asm-equal functions can get deduplicated + // by the linker (we set the "unnamed_addr" attribute for LLVM) and functions can be + // duplicated across crates. We thus generate a new `AllocId` for every mention of a + // function. This means that `main as fn() == main as fn()` is false, while `let x = main as + // fn(); x == x` is true. However, as a quality-of-life feature it can be useful to identify + // certain functions uniquely, e.g. for backtraces. So we identify whether codegen will + // actually emit duplicate functions. It does that when they have non-lifetime generics, or + // when they can be inlined. All other functions are given a unique address. + // This is not a stable guarantee! The `inline` attribute is a hint and cannot be relied + // upon for anything. But if we don't do this, backtraces look terrible. + let is_generic = instance + .args + .into_iter() + .any(|kind| !matches!(kind.unpack(), ty::GenericArgKind::Lifetime(_))); + let can_be_inlined = match ecx.tcx.codegen_fn_attrs(instance.def_id()).inline { + InlineAttr::Never => false, + _ => true, + }; + !is_generic && !can_be_inlined + } else { + // Non-functions are never unique. + false + }; + // Always use the same salt if the allocation is unique. + if unique { + CTFE_ALLOC_SALT + } else { + ecx.machine.rng.borrow_mut().gen::() % ADDRS_PER_ANON_GLOBAL + } + } } diff --git a/src/tools/miri/tests/pass/dyn-traits.rs b/src/tools/miri/tests/pass/dyn-traits.rs index 908d521a0d816..f6220120968f7 100644 --- a/src/tools/miri/tests/pass/dyn-traits.rs +++ b/src/tools/miri/tests/pass/dyn-traits.rs @@ -141,7 +141,17 @@ fn unsized_dyn_autoderef() { } */ +fn vtable_ptr_eq() { + use std::{fmt, ptr}; + + // We don't always get the same vtable when casting this to a wide pointer. + let x = &2; + let x_wide = x as &dyn fmt::Display; + assert!((0..256).any(|_| !ptr::eq(x as &dyn fmt::Display, x_wide))); +} + fn main() { ref_box_dyn(); box_box_trait(); + vtable_ptr_eq(); } diff --git a/src/tools/miri/tests/pass/function_pointers.rs b/src/tools/miri/tests/pass/function_pointers.rs index 2aa3ebf2dd0b4..a5c4bc5e0d95b 100644 --- a/src/tools/miri/tests/pass/function_pointers.rs +++ b/src/tools/miri/tests/pass/function_pointers.rs @@ -82,7 +82,8 @@ fn main() { assert!(return_fn_ptr(i) == i); assert!(return_fn_ptr(i) as unsafe fn() -> i32 == i as fn() -> i32 as unsafe fn() -> i32); // Miri gives different addresses to different reifications of a generic function. - assert!(return_fn_ptr(f) != f); + // at least if we try often enough. + assert!((0..256).any(|_| return_fn_ptr(f) != f)); // However, if we only turn `f` into a function pointer and use that pointer, // it is equal to itself. let f2 = f as fn() -> i32; diff --git a/src/tools/miri/tests/pass/rc.rs b/src/tools/miri/tests/pass/rc.rs index 6dd1b3aff9e72..b1470dabc26bd 100644 --- a/src/tools/miri/tests/pass/rc.rs +++ b/src/tools/miri/tests/pass/rc.rs @@ -75,7 +75,8 @@ fn rc_fat_ptr_eq() { let p = Rc::new(1) as Rc; let a: *const dyn Debug = &*p; let r = Rc::into_raw(p); - assert!(a == r); + // Only compare the pointer parts, as the vtable might differ. + assert!(a as *const () == r as *const ()); drop(unsafe { Rc::from_raw(r) }); } From 09ae438eb095e6e2a94e1516cb962e66bfc6d747 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Thu, 8 Aug 2024 10:20:26 +0200 Subject: [PATCH 02/14] Add `Steal::is_stolen()` --- compiler/rustc_data_structures/src/steal.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/compiler/rustc_data_structures/src/steal.rs b/compiler/rustc_data_structures/src/steal.rs index 9a0fd52677d13..f305a8ad12000 100644 --- a/compiler/rustc_data_structures/src/steal.rs +++ b/compiler/rustc_data_structures/src/steal.rs @@ -51,6 +51,12 @@ impl Steal { let value = value_ref.take(); value.expect("attempt to steal from stolen value") } + + /// Writers of rustc drivers often encounter stealing issues. This function makes it possible to + /// handle these errors gracefully. This is not used within rustc as the time of writing. + pub fn is_stolen(&self) -> bool { + self.value.borrow().is_none() + } } impl> HashStable for Steal { From c966370b1959707be06dd2cbfb1436233fa595a3 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Thu, 8 Aug 2024 21:51:50 +0200 Subject: [PATCH 03/14] Tweak wording Co-authored-by: lcnr --- compiler/rustc_data_structures/src/steal.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/compiler/rustc_data_structures/src/steal.rs b/compiler/rustc_data_structures/src/steal.rs index f305a8ad12000..0f2c0eee27d2f 100644 --- a/compiler/rustc_data_structures/src/steal.rs +++ b/compiler/rustc_data_structures/src/steal.rs @@ -53,7 +53,10 @@ impl Steal { } /// Writers of rustc drivers often encounter stealing issues. This function makes it possible to - /// handle these errors gracefully. This is not used within rustc as the time of writing. + /// handle these errors gracefully. + /// + /// This should not be used within rustc as it leaks information not tracked + /// by the query system, breaking incremental compilation. pub fn is_stolen(&self) -> bool { self.value.borrow().is_none() } From 11b801bc4e410e77e6935a0c4544d4a90b868214 Mon Sep 17 00:00:00 2001 From: Min <45393763+MinxuanZ@users.noreply.github.com> Date: Thu, 8 Aug 2024 15:36:58 +0800 Subject: [PATCH 04/14] [MIPS] fix the name of signal 19 in mips --- library/std/src/sys/pal/unix/process/process_unix/tests.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/library/std/src/sys/pal/unix/process/process_unix/tests.rs b/library/std/src/sys/pal/unix/process/process_unix/tests.rs index e5e1f956bc351..1ca1ae7f3f45e 100644 --- a/library/std/src/sys/pal/unix/process/process_unix/tests.rs +++ b/library/std/src/sys/pal/unix/process/process_unix/tests.rs @@ -24,6 +24,9 @@ fn exitstatus_display_tests() { // The purpose of this test is to test our string formatting, not our understanding of the wait // status magic numbers. So restrict these to Linux. if cfg!(target_os = "linux") { + #[cfg(any(target_arch = "mips", target_arch = "mips64"))] + t(0x0137f, "stopped (not terminated) by signal: 19 (SIGPWR)"); + #[cfg(not(any(target_arch = "mips", target_arch = "mips64")))] t(0x0137f, "stopped (not terminated) by signal: 19 (SIGSTOP)"); t(0x0ffff, "continued (WIFCONTINUED)"); } From a3f8edff20a7d8aadd98d2553bd9ad12508a9980 Mon Sep 17 00:00:00 2001 From: Min <45393763+MinxuanZ@users.noreply.github.com> Date: Thu, 8 Aug 2024 15:53:53 +0800 Subject: [PATCH 05/14] [SPARC] fix the name of signal 19 in sparc arch --- .../std/src/sys/pal/unix/process/process_unix/tests.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/library/std/src/sys/pal/unix/process/process_unix/tests.rs b/library/std/src/sys/pal/unix/process/process_unix/tests.rs index 1ca1ae7f3f45e..f6044c4455c27 100644 --- a/library/std/src/sys/pal/unix/process/process_unix/tests.rs +++ b/library/std/src/sys/pal/unix/process/process_unix/tests.rs @@ -26,8 +26,16 @@ fn exitstatus_display_tests() { if cfg!(target_os = "linux") { #[cfg(any(target_arch = "mips", target_arch = "mips64"))] t(0x0137f, "stopped (not terminated) by signal: 19 (SIGPWR)"); - #[cfg(not(any(target_arch = "mips", target_arch = "mips64")))] + + #[cfg(any(target_arch = "sparc", target_arch = "sparc64"))] + t(0x0137f, "stopped (not terminated) by signal: 19 (SIGCONT)"); + + #[cfg(not(any(target_arch = "mips", + target_arch = "sparc", + target_arch = "mips64", + target_arch = "sprac64")))] t(0x0137f, "stopped (not terminated) by signal: 19 (SIGSTOP)"); + t(0x0ffff, "continued (WIFCONTINUED)"); } From 625432c837875e40b4167c0a5c7702b2f3c671a0 Mon Sep 17 00:00:00 2001 From: monstercatss Date: Fri, 9 Aug 2024 09:36:22 +0800 Subject: [PATCH 06/14] fix format --- .../std/src/sys/pal/unix/process/process_unix/tests.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/library/std/src/sys/pal/unix/process/process_unix/tests.rs b/library/std/src/sys/pal/unix/process/process_unix/tests.rs index f6044c4455c27..9410b1a87da0a 100644 --- a/library/std/src/sys/pal/unix/process/process_unix/tests.rs +++ b/library/std/src/sys/pal/unix/process/process_unix/tests.rs @@ -30,10 +30,12 @@ fn exitstatus_display_tests() { #[cfg(any(target_arch = "sparc", target_arch = "sparc64"))] t(0x0137f, "stopped (not terminated) by signal: 19 (SIGCONT)"); - #[cfg(not(any(target_arch = "mips", - target_arch = "sparc", - target_arch = "mips64", - target_arch = "sprac64")))] + #[cfg(not(any( + target_arch = "mips", + target_arch = "mips64", + target_arch = "sparc", + target_arch = "sparc64" + )))] t(0x0137f, "stopped (not terminated) by signal: 19 (SIGSTOP)"); t(0x0ffff, "continued (WIFCONTINUED)"); From 0106f5bcbab8ded01bc4b027d8381d2a3a7cc265 Mon Sep 17 00:00:00 2001 From: monstercatss Date: Fri, 9 Aug 2024 10:12:54 +0800 Subject: [PATCH 07/14] delete space --- library/std/src/sys/pal/unix/process/process_unix/tests.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/library/std/src/sys/pal/unix/process/process_unix/tests.rs b/library/std/src/sys/pal/unix/process/process_unix/tests.rs index 9410b1a87da0a..f4d6ac6b4e340 100644 --- a/library/std/src/sys/pal/unix/process/process_unix/tests.rs +++ b/library/std/src/sys/pal/unix/process/process_unix/tests.rs @@ -31,13 +31,13 @@ fn exitstatus_display_tests() { t(0x0137f, "stopped (not terminated) by signal: 19 (SIGCONT)"); #[cfg(not(any( - target_arch = "mips", + target_arch = "mips", target_arch = "mips64", target_arch = "sparc", target_arch = "sparc64" )))] t(0x0137f, "stopped (not terminated) by signal: 19 (SIGSTOP)"); - + t(0x0ffff, "continued (WIFCONTINUED)"); } From b589f86a09c823208cbe95755f0cb0883e28d0de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=AE=B8=E6=9D=B0=E5=8F=8B=20Jieyou=20Xu=20=28Joe=29?= Date: Fri, 9 Aug 2024 05:03:36 +0000 Subject: [PATCH 08/14] tests: add regression test for incorrect `BytePos` manipulation triggering assertion Issue: --- .../ui/typeck/suggest-arg-comma-delete-ice.rs | 19 ++++++++++ .../suggest-arg-comma-delete-ice.stderr | 38 +++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 tests/ui/typeck/suggest-arg-comma-delete-ice.rs create mode 100644 tests/ui/typeck/suggest-arg-comma-delete-ice.stderr diff --git a/tests/ui/typeck/suggest-arg-comma-delete-ice.rs b/tests/ui/typeck/suggest-arg-comma-delete-ice.rs new file mode 100644 index 0000000000000..48d02e13eca09 --- /dev/null +++ b/tests/ui/typeck/suggest-arg-comma-delete-ice.rs @@ -0,0 +1,19 @@ +//! Previously, we tried to remove extra arg commas when providing extra arg removal suggestions. +//! One of the edge cases is having to account for an arg that has a closing delimiter `)` +//! following it. However, the previous suggestion code assumed that the delimiter is in fact +//! exactly the 1-byte `)` character. This assumption was proven incorrect, because we recover +//! from Unicode-confusable delimiters in the parser, which means that the ending delimiter could be +//! a multi-byte codepoint that looks *like* a `)`. Subtracing 1 byte could land us in the middle of +//! a codepoint, triggering a codepoint boundary assertion. +//! +//! issue: rust-lang/rust#128717 + +fn main() { + // The following example has been modified from #128717 to remove irrelevant Unicode as they do + // not otherwise partake in the right delimiter calculation causing the codepoint boundary + // assertion. + main(rahh); + //~^ ERROR unknown start of token + //~| ERROR this function takes 0 arguments but 1 argument was supplied + //~| ERROR cannot find value `rahh` in this scope +} diff --git a/tests/ui/typeck/suggest-arg-comma-delete-ice.stderr b/tests/ui/typeck/suggest-arg-comma-delete-ice.stderr new file mode 100644 index 0000000000000..53608391f3c89 --- /dev/null +++ b/tests/ui/typeck/suggest-arg-comma-delete-ice.stderr @@ -0,0 +1,38 @@ +error: unknown start of token: \u{ff09} + --> $DIR/suggest-arg-comma-delete-ice.rs:15:14 + | +LL | main(rahh); + | ^^ + | +help: Unicode character ')' (Fullwidth Right Parenthesis) looks like ')' (Right Parenthesis), but it is not + | +LL | main(rahh); + | ~ + +error[E0425]: cannot find value `rahh` in this scope + --> $DIR/suggest-arg-comma-delete-ice.rs:15:10 + | +LL | main(rahh); + | ^^^^ not found in this scope + +error[E0061]: this function takes 0 arguments but 1 argument was supplied + --> $DIR/suggest-arg-comma-delete-ice.rs:15:5 + | +LL | main(rahh); + | ^^^^ ---- unexpected argument + | +note: function defined here + --> $DIR/suggest-arg-comma-delete-ice.rs:11:4 + | +LL | fn main() { + | ^^^^ +help: remove the extra argument + | +LL - main(rahh); +LL + main(); + | + +error: aborting due to 3 previous errors + +Some errors have detailed explanations: E0061, E0425. +For more information about an error, try `rustc --explain E0061`. From 879bfd7ad0f5f79e7bc90320dfb80dfabe91ac2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=AE=B8=E6=9D=B0=E5=8F=8B=20Jieyou=20Xu=20=28Joe=29?= Date: Fri, 9 Aug 2024 05:01:27 +0000 Subject: [PATCH 09/14] hir_typeck: use `end_point` over `BytePos` manipulations Parser has error recovery for Unicode-confusables, which includes the right parentheses `)`. If a multi-byte right parentheses look-alike reaches the argument removal suggestion diagnostics, it would trigger an assertion because the diagnostics used `- BytePos(1)` which can land within a multi-byte codepoint. This is fixed by using `SourceMap::end_point` to find the final right delimiter codepoint, which correctly respects codepoint boundaries. --- compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs index cef003e0a43de..89e7227eda2c7 100644 --- a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs +++ b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs @@ -22,7 +22,7 @@ use rustc_middle::ty::{self, IsSuggestable, Ty, TyCtxt}; use rustc_middle::{bug, span_bug}; use rustc_session::Session; use rustc_span::symbol::{kw, Ident}; -use rustc_span::{sym, BytePos, Span, DUMMY_SP}; +use rustc_span::{sym, Span, DUMMY_SP}; use rustc_trait_selection::error_reporting::infer::{FailureCode, ObligationCauseExt}; use rustc_trait_selection::infer::InferCtxtExt; use rustc_trait_selection::traits::{self, ObligationCauseCode, SelectionContext}; @@ -1140,8 +1140,11 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { .get(arg_idx + 1) .map(|&(_, sp)| sp) .unwrap_or_else(|| { - // Subtract one to move before `)` - call_expr.span.with_lo(call_expr.span.hi() - BytePos(1)) + // Try to move before `)`. Note that `)` here is not necessarily + // the latin right paren, it could be a Unicode-confusable that + // looks like a `)`, so we must not use `- BytePos(1)` + // manipulations here. + self.tcx().sess.source_map().end_point(call_expr.span) }); // Include next comma From 92520a9d4d74d23b29bd65f3372f1d0e5a0ec9cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=AE=B8=E6=9D=B0=E5=8F=8B=20Jieyou=20Xu=20=28Joe=29?= Date: Fri, 9 Aug 2024 05:48:58 +0000 Subject: [PATCH 10/14] tests: add regression test for #128845 For codepoint boundary assertion triggered by a let stmt compound assignment removal suggestion when encountering recovered multi-byte compound ops. Issue: --- .../suggest-remove-compount-assign-let-ice.rs | 16 ++++++++++++ ...gest-remove-compount-assign-let-ice.stderr | 26 +++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 tests/ui/parser/suggest-remove-compount-assign-let-ice.rs create mode 100644 tests/ui/parser/suggest-remove-compount-assign-let-ice.stderr diff --git a/tests/ui/parser/suggest-remove-compount-assign-let-ice.rs b/tests/ui/parser/suggest-remove-compount-assign-let-ice.rs new file mode 100644 index 0000000000000..1affee5678e44 --- /dev/null +++ b/tests/ui/parser/suggest-remove-compount-assign-let-ice.rs @@ -0,0 +1,16 @@ +//! Previously we would try to issue a suggestion for `let x = 1`, i.e. a compound assignment +//! within a `let` binding, to remove the ``. The suggestion code unfortunately incorrectly +//! assumed that the `` is an exactly-1-byte ASCII character, but this assumption is incorrect +//! because we also recover Unicode-confusables like `➖=` as `-=`. In this example, the suggestion +//! code used a `+ BytePos(1)` to calculate the span of the `` codepoint that looks like `-` but +//! the mult-byte Unicode look-alike would cause the suggested removal span to be inside a +//! multi-byte codepoint boundary, triggering a codepoint boundary assertion. +//! +//! issue: rust-lang/rust#128845 + +fn main() { + // Adapted from #128845 but with irrelevant components removed and simplified. + let x ➖= 1; + //~^ ERROR unknown start of token: \u{2796} + //~| ERROR: can't reassign to an uninitialized variable +} diff --git a/tests/ui/parser/suggest-remove-compount-assign-let-ice.stderr b/tests/ui/parser/suggest-remove-compount-assign-let-ice.stderr new file mode 100644 index 0000000000000..59716d69b50f9 --- /dev/null +++ b/tests/ui/parser/suggest-remove-compount-assign-let-ice.stderr @@ -0,0 +1,26 @@ +error: unknown start of token: \u{2796} + --> $DIR/suggest-remove-compount-assign-let-ice.rs:13:11 + | +LL | let x ➖= 1; + | ^^ + | +help: Unicode character '➖' (Heavy Minus Sign) looks like '-' (Minus/Hyphen), but it is not + | +LL | let x -= 1; + | ~ + +error: can't reassign to an uninitialized variable + --> $DIR/suggest-remove-compount-assign-let-ice.rs:13:11 + | +LL | let x ➖= 1; + | ^^^ + | + = help: if you meant to overwrite, remove the `let` binding +help: initialize the variable + | +LL - let x ➖= 1; +LL + let x = 1; + | + +error: aborting due to 2 previous errors + From d65f1316bba2b3cdccce3f0d7d3e2eb27b5fbe49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=AE=B8=E6=9D=B0=E5=8F=8B=20Jieyou=20Xu=20=28Joe=29?= Date: Fri, 9 Aug 2024 05:48:52 +0000 Subject: [PATCH 11/14] parser: ensure let stmt compound assignment removal suggestion respect codepoint boundaries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously we would try to issue a suggestion for `let x = 1`, i.e. a compound assignment within a `let` binding, to remove the ``. The suggestion code unfortunately incorrectly assumed that the `` is an exactly-1-byte ASCII character, but this assumption is incorrect because we also recover Unicode-confusables like `➖=` as `-=`. In this example, the suggestion code used a `+ BytePos(1)` to calculate the span of the `` codepoint that looks like `-` but the mult-byte Unicode look-alike would cause the suggested removal span to be inside a multi-byte codepoint boundary, triggering a codepoint boundary assertion. Issue: --- compiler/rustc_parse/src/parser/stmt.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/compiler/rustc_parse/src/parser/stmt.rs b/compiler/rustc_parse/src/parser/stmt.rs index b3efb87a4a26a..a3b782d651d3f 100644 --- a/compiler/rustc_parse/src/parser/stmt.rs +++ b/compiler/rustc_parse/src/parser/stmt.rs @@ -408,10 +408,14 @@ impl<'a> Parser<'a> { fn parse_initializer(&mut self, eq_optional: bool) -> PResult<'a, Option>> { let eq_consumed = match self.token.kind { token::BinOpEq(..) => { - // Recover `let x = 1` as `let x = 1` + // Recover `let x = 1` as `let x = 1` We must not use `+ BytePos(1)` here + // because `` can be a multi-byte lookalike that was recovered, e.g. `➖=` (the + // `➖` is a U+2796 Heavy Minus Sign Unicode Character) that was recovered as a + // `-=`. + let extra_op_span = self.psess.source_map().start_point(self.token.span); self.dcx().emit_err(errors::CompoundAssignmentExpressionInLet { span: self.token.span, - suggestion: self.token.span.with_hi(self.token.span.lo() + BytePos(1)), + suggestion: extra_op_span, }); self.bump(); true From dec5b463fbe506302d7a63fa8c52dfc22b4b110f Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 6 Aug 2024 22:45:42 +0200 Subject: [PATCH 12/14] do not make function addresses unique with cross_crate_inline_threshold=always (even if that breaks backtraces) --- src/tools/miri/src/machine.rs | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs index df4154bcb5892..411619ed6b914 100644 --- a/src/tools/miri/src/machine.rs +++ b/src/tools/miri/src/machine.rs @@ -24,6 +24,7 @@ use rustc_middle::{ Instance, Ty, TyCtxt, }, }; +use rustc_session::config::InliningThreshold; use rustc_span::def_id::{CrateNum, DefId}; use rustc_span::{Span, SpanData, Symbol}; use rustc_target::abi::{Align, Size}; @@ -1525,24 +1526,29 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> { instance: Option>, ) -> usize { let unique = if let Some(instance) = instance { - // Functions cannot be identified by pointers, as asm-equal functions can get deduplicated - // by the linker (we set the "unnamed_addr" attribute for LLVM) and functions can be - // duplicated across crates. We thus generate a new `AllocId` for every mention of a - // function. This means that `main as fn() == main as fn()` is false, while `let x = main as - // fn(); x == x` is true. However, as a quality-of-life feature it can be useful to identify - // certain functions uniquely, e.g. for backtraces. So we identify whether codegen will - // actually emit duplicate functions. It does that when they have non-lifetime generics, or - // when they can be inlined. All other functions are given a unique address. - // This is not a stable guarantee! The `inline` attribute is a hint and cannot be relied - // upon for anything. But if we don't do this, backtraces look terrible. + // Functions cannot be identified by pointers, as asm-equal functions can get + // deduplicated by the linker (we set the "unnamed_addr" attribute for LLVM) and + // functions can be duplicated across crates. We thus generate a new `AllocId` for every + // mention of a function. This means that `main as fn() == main as fn()` is false, while + // `let x = main as fn(); x == x` is true. However, as a quality-of-life feature it can + // be useful to identify certain functions uniquely, e.g. for backtraces. So we identify + // whether codegen will actually emit duplicate functions. It does that when they have + // non-lifetime generics, or when they can be inlined. All other functions are given a + // unique address. This is not a stable guarantee! The `inline` attribute is a hint and + // cannot be relied upon for anything. But if we don't do this, the + // `__rust_begin_short_backtrace`/`__rust_end_short_backtrace` logic breaks and panic + // backtraces look terrible. let is_generic = instance .args .into_iter() .any(|kind| !matches!(kind.unpack(), ty::GenericArgKind::Lifetime(_))); - let can_be_inlined = match ecx.tcx.codegen_fn_attrs(instance.def_id()).inline { - InlineAttr::Never => false, - _ => true, - }; + let can_be_inlined = matches!( + ecx.tcx.sess.opts.unstable_opts.cross_crate_inline_threshold, + InliningThreshold::Always + ) || !matches!( + ecx.tcx.codegen_fn_attrs(instance.def_id()).inline, + InlineAttr::Never + ); !is_generic && !can_be_inlined } else { // Non-functions are never unique. From f72cb0415bdb6fe3699fb44f9b41da1d34c74189 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Fri, 9 Aug 2024 12:17:01 +0200 Subject: [PATCH 13/14] Make `Build::run` comment more accurate --- src/bootstrap/src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/bootstrap/src/lib.rs b/src/bootstrap/src/lib.rs index 453fb39327d63..d3aff8f89a0ca 100644 --- a/src/bootstrap/src/lib.rs +++ b/src/bootstrap/src/lib.rs @@ -986,7 +986,8 @@ impl Build { } /// Execute a command and return its output. - /// This method should be used for all command executions in bootstrap. + /// Note: Ideally, you should use one of the BootstrapCommand::run* functions to + /// execute commands. They internally call this method. #[track_caller] fn run( &self, From a380d5e8f68de64441274ca2e56dd3a08a60a121 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Fri, 9 Aug 2024 12:17:22 +0200 Subject: [PATCH 14/14] Do not print verbose error when a bootstrap command fails without verbose mode --- src/bootstrap/src/lib.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/bootstrap/src/lib.rs b/src/bootstrap/src/lib.rs index d3aff8f89a0ca..2062d435bfc99 100644 --- a/src/bootstrap/src/lib.rs +++ b/src/bootstrap/src/lib.rs @@ -1058,20 +1058,28 @@ Executed at: {executed_at}"#, CommandOutput::did_not_start(stdout, stderr) } }; + + let fail = |message: &str| { + if self.is_verbose() { + println!("{message}"); + } else { + println!("Command has failed. Rerun with -v to see more details."); + } + exit!(1); + }; + if !output.is_success() { match command.failure_behavior { BehaviorOnFailure::DelayFail => { if self.fail_fast { - println!("{message}"); - exit!(1); + fail(&message); } let mut failures = self.delayed_failures.borrow_mut(); failures.push(message); } BehaviorOnFailure::Exit => { - println!("{message}"); - exit!(1); + fail(&message); } BehaviorOnFailure::Ignore => { // If failures are allowed, either the error has been printed already