Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More aggressive inlining in the fast_reject code. #137760

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion compiler/rustc_trait_selection/src/traits/coherence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,10 @@ pub fn overlapping_impls(
let impl1_ref = tcx.impl_trait_ref(impl1_def_id);
let impl2_ref = tcx.impl_trait_ref(impl2_def_id);
let may_overlap = match (impl1_ref, impl2_ref) {
(Some(a), Some(b)) => drcx.args_may_unify(a.skip_binder().args, b.skip_binder().args),
(Some(a), Some(b)) => {
// This call site can be hot.
drcx.inlined_args_may_unify(a.skip_binder().args, b.skip_binder().args)
}
(None, None) => {
let self_ty1 = tcx.type_of(impl1_def_id).skip_binder();
let self_ty2 = tcx.type_of(impl2_def_id).skip_binder();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -561,9 +561,11 @@ impl<'cx, 'tcx> SelectionContext<'cx, 'tcx> {
// consider a "quick reject". This avoids creating more types
// and so forth that we need to.
let impl_trait_header = self.tcx().impl_trait_header(impl_def_id).unwrap();
if !drcx
.args_may_unify(obligation_args, impl_trait_header.trait_ref.skip_binder().args)
{
// This call site can be hot.
if !drcx.inlined_args_may_unify(
obligation_args,
impl_trait_header.trait_ref.skip_binder().args,
) {
return;
}

Expand Down
70 changes: 54 additions & 16 deletions compiler/rustc_type_ir/src/fast_reject.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,15 @@ impl<I: Interner, const INSTANTIATE_LHS_WITH_INFER: bool, const INSTANTIATE_RHS_
// and small enough to prevent hangs.
const STARTING_DEPTH: usize = 8;

#[inline(always)]
pub fn inlined_args_may_unify(
self,
obligation_args: I::GenericArgs,
impl_args: I::GenericArgs,
) -> bool {
self.inlined_args_may_unify_inner(obligation_args, impl_args, Self::STARTING_DEPTH)
}

pub fn args_may_unify(
self,
obligation_args: I::GenericArgs,
Expand All @@ -228,32 +237,59 @@ impl<I: Interner, const INSTANTIATE_LHS_WITH_INFER: bool, const INSTANTIATE_RHS_
self.args_may_unify_inner(obligation_args, impl_args, Self::STARTING_DEPTH)
}

pub fn types_may_unify(self, lhs: I::Ty, rhs: I::Ty) -> bool {
self.types_may_unify_inner(lhs, rhs, Self::STARTING_DEPTH)
#[inline(always)]
fn inlined_arg_may_unify(self, obl: I::GenericArg, imp: I::GenericArg, depth: usize) -> bool {
match (obl.kind(), imp.kind()) {
// We don't fast reject based on regions.
(ty::GenericArgKind::Lifetime(_), ty::GenericArgKind::Lifetime(_)) => true,
(ty::GenericArgKind::Type(obl), ty::GenericArgKind::Type(imp)) => {
self.types_may_unify_inner(obl, imp, depth)
}
(ty::GenericArgKind::Const(obl), ty::GenericArgKind::Const(imp)) => {
self.consts_may_unify_inner(obl, imp)
}
_ => panic!("kind mismatch: {obl:?} {imp:?}"),
}
}

fn arg_may_unify(self, obl: I::GenericArg, imp: I::GenericArg, depth: usize) -> bool {
self.inlined_arg_may_unify(obl, imp, depth)
}

fn args_may_unify_inner(
self,
obligation_args: I::GenericArgs,
impl_args: I::GenericArgs,
depth: usize,
) -> bool {
iter::zip(obligation_args.iter(), impl_args.iter())
.all(|(obl, imp)| self.arg_may_unify(obl, imp, depth))
}

#[inline(always)]
fn inlined_args_may_unify_inner(
self,
obligation_args: I::GenericArgs,
impl_args: I::GenericArgs,
depth: usize,
) -> bool {
// No need to decrement the depth here as this function is only
// recursively reachable via `types_may_unify_inner` which already
// increments the depth for us.
iter::zip(obligation_args.iter(), impl_args.iter()).all(|(obl, imp)| {
match (obl.kind(), imp.kind()) {
// We don't fast reject based on regions.
(ty::GenericArgKind::Lifetime(_), ty::GenericArgKind::Lifetime(_)) => true,
(ty::GenericArgKind::Type(obl), ty::GenericArgKind::Type(imp)) => {
self.types_may_unify_inner(obl, imp, depth)
}
(ty::GenericArgKind::Const(obl), ty::GenericArgKind::Const(imp)) => {
self.consts_may_unify_inner(obl, imp)
}
_ => panic!("kind mismatch: {obl:?} {imp:?}"),
}
})

// The len==1 case accounts for 99.9% of occurrences in the benchmarks
// where this code is hot, e.g. `bitmaps-3.1.0` and `typenum-1.17.0`.
if obligation_args.len() == 1 {
let obl = obligation_args.as_slice()[0];
let imp = impl_args.as_slice()[0];
self.inlined_arg_may_unify(obl, imp, depth)
} else {
self.args_may_unify_inner(obligation_args, impl_args, depth)
}
}

pub fn types_may_unify(self, lhs: I::Ty, rhs: I::Ty) -> bool {
self.types_may_unify_inner(lhs, rhs, Self::STARTING_DEPTH)
}

fn types_may_unify_inner(self, lhs: I::Ty, rhs: I::Ty, depth: usize) -> bool {
Expand Down Expand Up @@ -320,7 +356,9 @@ impl<I: Interner, const INSTANTIATE_LHS_WITH_INFER: bool, const INSTANTIATE_RHS_

ty::Adt(lhs_def, lhs_args) => match rhs.kind() {
ty::Adt(rhs_def, rhs_args) => {
lhs_def == rhs_def && self.args_may_unify_inner(lhs_args, rhs_args, depth)
// This call site can be hot.
lhs_def == rhs_def
&& self.inlined_args_may_unify_inner(lhs_args, rhs_args, depth)
}
_ => false,
Comment on lines 358 to 363
Copy link
Contributor

@lcnr lcnr Mar 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what's the perf impact of using

            ty::Adt(lhs_def, lhs_args) => match rhs.kind() {
                ty::Adt(rhs_def, rhs_args) if lhs_def == rhs_def => if lhs_args.len() == 1 {
                    // This code is very hot and a lot of generic ADTs have
                    // just a single generic argument.
                    self.inlined_arg_may_unify(lhs_args.as_slice()[0], rhs_args.as_slice()[0], depth)
                } else
                    self.args_may_unify_inner(lhs_args, rhs_args, depth)
                },
                _ => false,
            },

and not adding inlined_args_may_unify_inner?

I don't like call-sites using inlined_args_may_unify if we can avoid it/it doesn't have a meaningful perf impact to not do so 🤔

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This call is hot, but the two calls to inlined_args_may_unify are also hot. So only having the len==1 optimization at this call site would reduce its impact.

},
Expand Down
Loading