Skip to content

Commit 3d8cc34

Browse files
committed
Auto merge of rust-lang#123572 - Mark-Simulacrum:vtable-methods, r=<try>
Increase vtable layout size This improves LLVM's codegen by allowing vtable loads to be hoisted out of loops (as just one example). The calculation here is an under-approximation but works for simple trait hierarchies (e.g., FnMut will be improved). We have a runtime assert that the approximation is accurate, so there's no risk of UB as a result of getting this wrong. ```rust #[no_mangle] pub fn foo(elements: &[u32], callback: &mut dyn Callback) { for element in elements.iter() { if *element != 0 { callback.call(*element); } } } pub trait Callback { fn call(&mut self, _: u32); } ``` Simplifying a bit (e.g., numbering ends up different): ```diff ; Function Attrs: nonlazybind uwtable -define void `@foo(ptr` noalias noundef nonnull readonly align 4 %elements.0, i64 noundef %elements.1, ptr noundef nonnull align 1 %callback.0, ptr noalias nocapture noundef readonly align 8 dereferenceable(24) %callback.1) unnamed_addr #0 { +define void `@foo(ptr` noalias noundef nonnull readonly align 4 %elements.0, i64 noundef %elements.1, ptr noundef nonnull align 1 %callback.0, ptr noalias nocapture noundef readonly align 8 dereferenceable(32) %callback.1) unnamed_addr #0 { start: %_15 = getelementptr inbounds i32, ptr %elements.0, i64 %elements.1 `@@` -13,4 +13,5 `@@` bb4.lr.ph: ; preds = %start %1 = getelementptr inbounds i8, ptr %callback.1, i64 24 + %2 = load ptr, ptr %1, align 8, !nonnull !3 br label %bb4 bb6: ; preds = %bb4 - %4 = load ptr, ptr %1, align 8, !invariant.load !3, !nonnull !3 - tail call void %4(ptr noundef nonnull align 1 %callback.0, i32 noundef %_9) + tail call void %2(ptr noundef nonnull align 1 %callback.0, i32 noundef %_9) br label %bb7 } ```
2 parents 0e5f520 + 26954e7 commit 3d8cc34

File tree

3 files changed

+77
-71
lines changed

3 files changed

+77
-71
lines changed

compiler/rustc_middle/src/ty/layout.rs

+7-18
Original file line numberDiff line numberDiff line change
@@ -771,25 +771,14 @@ where
771771
});
772772
}
773773

774-
let mk_dyn_vtable = || {
774+
let mk_dyn_vtable = |principal: Option<ty::PolyExistentialTraitRef<'tcx>>| {
775+
let min_count = ty::vtable_min_entries(tcx, principal);
775776
Ty::new_imm_ref(
776777
tcx,
777778
tcx.lifetimes.re_static,
778-
Ty::new_array(tcx, tcx.types.usize, 3),
779+
// FIXME: properly type (e.g. usize and fn pointers) the fields.
780+
Ty::new_array(tcx, tcx.types.usize, min_count.try_into().unwrap()),
779781
)
780-
/* FIXME: use actual fn pointers
781-
Warning: naively computing the number of entries in the
782-
vtable by counting the methods on the trait + methods on
783-
all parent traits does not work, because some methods can
784-
be not object safe and thus excluded from the vtable.
785-
Increase this counter if you tried to implement this but
786-
failed to do it without duplicating a lot of code from
787-
other places in the compiler: 2
788-
Ty::new_tup(tcx,&[
789-
Ty::new_array(tcx,tcx.types.usize, 3),
790-
Ty::new_array(tcx,Option<fn()>),
791-
])
792-
*/
793782
};
794783

795784
let metadata = if let Some(metadata_def_id) = tcx.lang_items().metadata_type()
@@ -808,16 +797,16 @@ where
808797
// `std::mem::uninitialized::<&dyn Trait>()`, for example.
809798
if let ty::Adt(def, args) = metadata.kind()
810799
&& Some(def.did()) == tcx.lang_items().dyn_metadata()
811-
&& args.type_at(0).is_trait()
800+
&& let ty::Dynamic(data, _, ty::Dyn) = args.type_at(0).kind()
812801
{
813-
mk_dyn_vtable()
802+
mk_dyn_vtable(data.principal())
814803
} else {
815804
metadata
816805
}
817806
} else {
818807
match tcx.struct_tail_erasing_lifetimes(pointee, cx.param_env()).kind() {
819808
ty::Slice(_) | ty::Str => tcx.types.usize,
820-
ty::Dynamic(_, _, ty::Dyn) => mk_dyn_vtable(),
809+
ty::Dynamic(data, _, ty::Dyn) => mk_dyn_vtable(data.principal()),
821810
_ => bug!("TyAndLayout::field({:?}): not applicable", this),
822811
}
823812
};

compiler/rustc_middle/src/ty/vtable.rs

+64
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ use std::fmt;
33
use crate::mir::interpret::{alloc_range, AllocId, Allocation, Pointer, Scalar};
44
use crate::ty::{self, Instance, PolyTraitRef, Ty, TyCtxt};
55
use rustc_ast::Mutability;
6+
use rustc_data_structures::fx::FxHashSet;
7+
use rustc_hir::def_id::DefId;
68

79
#[derive(Clone, Copy, PartialEq, HashStable)]
810
pub enum VtblEntry<'tcx> {
@@ -45,6 +47,65 @@ pub const COMMON_VTABLE_ENTRIES_DROPINPLACE: usize = 0;
4547
pub const COMMON_VTABLE_ENTRIES_SIZE: usize = 1;
4648
pub const COMMON_VTABLE_ENTRIES_ALIGN: usize = 2;
4749

50+
// FIXME: This is duplicating equivalent code in compiler/rustc_trait_selection/src/traits/util.rs
51+
// But that is a downstream crate, and this code is pretty simple. Probably OK for now.
52+
struct SupertraitDefIds<'tcx> {
53+
tcx: TyCtxt<'tcx>,
54+
stack: Vec<DefId>,
55+
visited: FxHashSet<DefId>,
56+
}
57+
58+
fn supertrait_def_ids(tcx: TyCtxt<'_>, trait_def_id: DefId) -> SupertraitDefIds<'_> {
59+
SupertraitDefIds {
60+
tcx,
61+
stack: vec![trait_def_id],
62+
visited: Some(trait_def_id).into_iter().collect(),
63+
}
64+
}
65+
66+
impl Iterator for SupertraitDefIds<'_> {
67+
type Item = DefId;
68+
69+
fn next(&mut self) -> Option<DefId> {
70+
let def_id = self.stack.pop()?;
71+
let predicates = self.tcx.super_predicates_of(def_id);
72+
let visited = &mut self.visited;
73+
self.stack.extend(
74+
predicates
75+
.predicates
76+
.iter()
77+
.filter_map(|(pred, _)| pred.as_trait_clause())
78+
.map(|trait_ref| trait_ref.def_id())
79+
.filter(|&super_def_id| visited.insert(super_def_id)),
80+
);
81+
Some(def_id)
82+
}
83+
}
84+
85+
// Note that we don't have access to a self type here, this has to be purely based on the trait (and
86+
// supertrait) definitions. That means we can't call into the same vtable_entries code since that
87+
// returns a specific instantiation (e.g., with Vacant slots when bounds aren't satisfied). The goal
88+
// here is to do a best-effort approximation without duplicating a lot of code.
89+
//
90+
// This function is used in layout computation for e.g. &dyn Trait, so it's critical that this
91+
// function is an accurate approximation. We verify this when actually computing the vtable below.
92+
pub(crate) fn vtable_min_entries<'tcx>(
93+
tcx: TyCtxt<'tcx>,
94+
trait_ref: Option<ty::PolyExistentialTraitRef<'tcx>>,
95+
) -> usize {
96+
let mut count = TyCtxt::COMMON_VTABLE_ENTRIES.len();
97+
let Some(trait_ref) = trait_ref else {
98+
return count;
99+
};
100+
101+
// This includes self in supertraits.
102+
for def_id in supertrait_def_ids(tcx, trait_ref.def_id()) {
103+
count += tcx.own_existential_vtable_entries(def_id).len();
104+
}
105+
106+
count
107+
}
108+
48109
/// Retrieves an allocation that represents the contents of a vtable.
49110
/// Since this is a query, allocations are cached and not duplicated.
50111
pub(super) fn vtable_allocation_provider<'tcx>(
@@ -62,6 +123,9 @@ pub(super) fn vtable_allocation_provider<'tcx>(
62123
TyCtxt::COMMON_VTABLE_ENTRIES
63124
};
64125

126+
// This confirms that the layout computation for &dyn Trait has an accurate sizing.
127+
assert!(vtable_entries.len() >= vtable_min_entries(tcx, poly_trait_ref));
128+
65129
let layout = tcx
66130
.layout_of(ty::ParamEnv::reveal_all().and(ty))
67131
.expect("failed to build vtable representation");

compiler/rustc_trait_selection/src/traits/object_safety.rs

+6-53
Original file line numberDiff line numberDiff line change
@@ -497,59 +497,12 @@ fn virtual_call_violations_for_method<'tcx>(
497497
};
498498
errors.push(MethodViolationCode::UndispatchableReceiver(span));
499499
} else {
500-
// Do sanity check to make sure the receiver actually has the layout of a pointer.
501-
502-
use rustc_target::abi::Abi;
503-
504-
let param_env = tcx.param_env(method.def_id);
505-
506-
let abi_of_ty = |ty: Ty<'tcx>| -> Option<Abi> {
507-
match tcx.layout_of(param_env.and(ty)) {
508-
Ok(layout) => Some(layout.abi),
509-
Err(err) => {
510-
// #78372
511-
tcx.dcx().span_delayed_bug(
512-
tcx.def_span(method.def_id),
513-
format!("error: {err}\n while computing layout for type {ty:?}"),
514-
);
515-
None
516-
}
517-
}
518-
};
519-
520-
// e.g., `Rc<()>`
521-
let unit_receiver_ty =
522-
receiver_for_self_ty(tcx, receiver_ty, Ty::new_unit(tcx), method.def_id);
523-
524-
match abi_of_ty(unit_receiver_ty) {
525-
Some(Abi::Scalar(..)) => (),
526-
abi => {
527-
tcx.dcx().span_delayed_bug(
528-
tcx.def_span(method.def_id),
529-
format!(
530-
"receiver when `Self = ()` should have a Scalar ABI; found {abi:?}"
531-
),
532-
);
533-
}
534-
}
535-
536-
let trait_object_ty = object_ty_for_trait(tcx, trait_def_id, tcx.lifetimes.re_static);
537-
538-
// e.g., `Rc<dyn Trait>`
539-
let trait_object_receiver =
540-
receiver_for_self_ty(tcx, receiver_ty, trait_object_ty, method.def_id);
541-
542-
match abi_of_ty(trait_object_receiver) {
543-
Some(Abi::ScalarPair(..)) => (),
544-
abi => {
545-
tcx.dcx().span_delayed_bug(
546-
tcx.def_span(method.def_id),
547-
format!(
548-
"receiver when `Self = {trait_object_ty}` should have a ScalarPair ABI; found {abi:?}"
549-
),
550-
);
551-
}
552-
}
500+
// We used to have a sanity check here for whether the ABI of the receiver with `()` and `dyn Trait`
501+
// self types was correct. For now that code has been deleted since
502+
// computing the layout_of such types requires knowing the number of methods in the
503+
// virtual table, which in turn requires this code. So we skip these checks. Both
504+
// of these are just sanity checking (i.e. this code is not responsible for ABI) so this
505+
// should be fine.
553506
}
554507
}
555508

0 commit comments

Comments
 (0)