Skip to content

Let codegen decide when to mem::swap with immediates #122582

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions compiler/rustc_codegen_ssa/src/mir/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use crate::traits::*;
use crate::MemFlags;

use rustc_middle::ty::{self, Ty, TyCtxt};
use rustc_session::config::OptLevel;
use rustc_span::{sym, Span};
use rustc_target::abi::{
call::{FnAbi, PassMode},
Expand Down Expand Up @@ -76,6 +77,29 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
let name = bx.tcx().item_name(def_id);
let name_str = name.as_str();

// If we're swapping something that's *not* an `OperandValue::Ref`,
// then we can do it directly and avoid the alloca.
// Otherwise, we'll let the fallback MIR body take care of it.
if let sym::typed_swap = name {
let pointee_ty = fn_args.type_at(0);
let pointee_layout = bx.layout_of(pointee_ty);
if !bx.is_backend_ref(pointee_layout)
// But if we're not going to optimize, trying to use the fallback
// body just makes things worse, so don't bother.
|| bx.sess().opts.optimize == OptLevel::No
// NOTE(eddyb) SPIR-V's Logical addressing model doesn't allow for arbitrary
// reinterpretation of values as (chunkable) byte arrays, and the loop in the
// block optimization in `ptr::swap_nonoverlapping` is hard to rewrite back
// into the (unoptimized) direct swapping implementation, so we disable it.
|| bx.sess().target.arch == "spirv"
{
let x_place = PlaceRef::new_sized(args[0].immediate(), pointee_layout);
let y_place = PlaceRef::new_sized(args[1].immediate(), pointee_layout);
bx.typed_place_swap(x_place, y_place);
return Ok(());
}
}

let llret_ty = bx.backend_type(bx.layout_of(ret_ty));
let result = PlaceRef::new_sized(llresult, fn_abi.ret.layout);

Expand Down
54 changes: 52 additions & 2 deletions compiler/rustc_codegen_ssa/src/traits/builder.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,24 @@
use super::abi::AbiBuilderMethods;
use super::asm::AsmBuilderMethods;
use super::consts::ConstMethods;
use super::coverageinfo::CoverageInfoBuilderMethods;
use super::debuginfo::DebugInfoBuilderMethods;
use super::intrinsic::IntrinsicCallMethods;
use super::misc::MiscMethods;
use super::type_::{ArgAbiMethods, BaseTypeMethods};
use super::type_::{ArgAbiMethods, BaseTypeMethods, LayoutTypeMethods};
use super::{HasCodegen, StaticBuilderMethods};

use crate::common::{
AtomicOrdering, AtomicRmwBinOp, IntPredicate, RealPredicate, SynchronizationScope, TypeKind,
};
use crate::mir::operand::OperandRef;
use crate::mir::operand::{OperandRef, OperandValue};
use crate::mir::place::PlaceRef;
use crate::MemFlags;

use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
use rustc_middle::ty::layout::{HasParamEnv, TyAndLayout};
use rustc_middle::ty::Ty;
use rustc_session::config::OptLevel;
use rustc_span::Span;
use rustc_target::abi::call::FnAbi;
use rustc_target::abi::{Abi, Align, Scalar, Size, WrappingRange};
Expand Down Expand Up @@ -267,6 +269,54 @@ pub trait BuilderMethods<'a, 'tcx>:
flags: MemFlags,
);

/// *Typed* copy for non-overlapping places.
///
/// Has a default implementation in terms of `memcpy`, but specific backends
/// can override to do something smarter if possible.
///
/// (For example, typed load-stores with alias metadata.)
fn typed_place_copy(
&mut self,
dst: PlaceRef<'tcx, Self::Value>,
src: PlaceRef<'tcx, Self::Value>,
) {
debug_assert!(src.llextra.is_none());
debug_assert!(dst.llextra.is_none());
debug_assert_eq!(dst.layout.size, src.layout.size);
if self.sess().opts.optimize == OptLevel::No && self.is_backend_immediate(dst.layout) {
// If we're not optimizing, the aliasing information from `memcpy`
// isn't useful, so just load-store the value for smaller code.
let temp = self.load_operand(src);
temp.val.store(self, dst);
} else if !dst.layout.is_zst() {
let bytes = self.const_usize(dst.layout.size.bytes());
self.memcpy(dst.llval, dst.align, src.llval, src.align, bytes, MemFlags::empty());
}
}

/// *Typed* swap for non-overlapping places.
///
/// Avoids `alloca`s for Immediates and ScalarPairs.
///
/// FIXME: Maybe do something smarter for Ref types too?
/// For now, the `typed_swap` intrinsic just doesn't call this for those
/// cases (in non-debug), preferring the fallback body instead.
fn typed_place_swap(
&mut self,
left: PlaceRef<'tcx, Self::Value>,
right: PlaceRef<'tcx, Self::Value>,
) {
let mut temp = self.load_operand(left);
if let OperandValue::Ref(..) = temp.val {
// The SSA value isn't stand-alone, so we need to copy it elsewhere
let alloca = PlaceRef::alloca(self, left.layout);
self.typed_place_copy(alloca, left);
temp = self.load_operand(alloca);
}
self.typed_place_copy(left, right);
temp.val.store(self, right);
}

fn select(
&mut self,
cond: Self::Value,
Expand Down
14 changes: 14 additions & 0 deletions compiler/rustc_codegen_ssa/src/traits/type_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,20 @@ pub trait LayoutTypeMethods<'tcx>: Backend<'tcx> {
immediate: bool,
) -> Self::Type;

/// A type that produces an [`OperandValue::Ref`] when loaded.
///
/// AKA one that's not a ZST, not `is_backend_immediate`, and
/// not `is_backend_scalar_pair`. For such a type, a
/// [`load_operand`] doesn't actually `load` anything.
///
/// [`OperandValue::Ref`]: crate::mir::operand::OperandValue::Ref
/// [`load_operand`]: super::BuilderMethods::load_operand
fn is_backend_ref(&self, layout: TyAndLayout<'tcx>) -> bool {
!(layout.is_zst()
|| self.is_backend_immediate(layout)
|| self.is_backend_scalar_pair(layout))
}

/// A type that can be used in a [`super::BuilderMethods::load`] +
/// [`super::BuilderMethods::store`] pair to implement a *typed* copy,
/// such as a MIR `*_0 = *_1`.
Expand Down
25 changes: 23 additions & 2 deletions compiler/rustc_const_eval/src/interpret/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ use rustc_span::symbol::{sym, Symbol};
use rustc_target::abi::Size;

use super::{
util::ensure_monomorphic_enough, CheckInAllocMsg, ImmTy, InterpCx, MPlaceTy, Machine, OpTy,
Pointer,
memory::MemoryKind, util::ensure_monomorphic_enough, CheckInAllocMsg, ImmTy, InterpCx,
MPlaceTy, Machine, OpTy, Pointer,
};

use crate::fluent_generated as fluent;
Expand Down Expand Up @@ -415,6 +415,9 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
let result = self.raw_eq_intrinsic(&args[0], &args[1])?;
self.write_scalar(result, dest)?;
}
sym::typed_swap => {
self.typed_swap_intrinsic(&args[0], &args[1])?;
}

sym::vtable_size => {
let ptr = self.read_pointer(&args[0])?;
Expand Down Expand Up @@ -608,6 +611,24 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
self.mem_copy(src, dst, size, nonoverlapping)
}

/// Does a *typed* swap of `*left` and `*right`.
fn typed_swap_intrinsic(
&mut self,
left: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::Provenance>,
right: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::Provenance>,
) -> InterpResult<'tcx> {
let left = self.deref_pointer(left)?;
let right = self.deref_pointer(right)?;
debug_assert_eq!(left.layout, right.layout);
let kind = MemoryKind::Stack;
let temp = self.allocate(left.layout, kind)?;
self.copy_op(&left, &temp)?;
self.copy_op(&right, &left)?;
self.copy_op(&temp, &right)?;
self.deallocate_ptr(temp.ptr(), None, kind)?;
Ok(())
}

pub(crate) fn write_bytes_intrinsic(
&mut self,
dst: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::Provenance>,
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_hir_analysis/src/check/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,8 @@ pub fn check_intrinsic_type(
(1, 0, vec![Ty::new_mut_ptr(tcx, param(0)), param(0)], Ty::new_unit(tcx))
}

sym::typed_swap => (1, 1, vec![Ty::new_mut_ptr(tcx, param(0)); 2], Ty::new_unit(tcx)),

sym::discriminant_value => {
let assoc_items = tcx.associated_item_def_ids(
tcx.require_lang_item(hir::LangItem::DiscriminantKind, None),
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1830,6 +1830,7 @@ symbols! {
type_macros,
type_name,
type_privacy_lints,
typed_swap,
u128,
u128_legacy_const_max,
u128_legacy_const_min,
Expand Down
22 changes: 22 additions & 0 deletions library/core/src/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
use crate::marker::DiscriminantKind;
use crate::marker::Tuple;
use crate::mem::align_of;
use crate::ptr;

pub mod mir;
pub mod simd;
Expand Down Expand Up @@ -2628,6 +2629,27 @@ pub const fn is_val_statically_known<T: Copy>(_arg: T) -> bool {
false
}

/// Non-overlapping *typed* swap of a single value.
///
/// The codegen backends will replace this with a better implementation when
/// `T` is a simple type that can be loaded and stored as an immediate.
///
/// The stabilized form of this intrinsic is [`crate::mem::swap`].
///
/// # Safety
///
/// `x` and `y` are readable and writable as `T`, and non-overlapping.
#[rustc_nounwind]
#[inline]
#[cfg_attr(not(bootstrap), rustc_intrinsic)]
// This has fallback `const fn` MIR, so shouldn't need stability, see #122652
#[rustc_const_unstable(feature = "const_typed_swap", issue = "none")]
pub const unsafe fn typed_swap<T>(x: *mut T, y: *mut T) {
// SAFETY: The caller provided single non-overlapping items behind
// pointers, so swapping them with `count: 1` is fine.
unsafe { ptr::swap_nonoverlapping(x, y, 1) };
}

/// Returns whether we should check for library UB. This evaluate to the value of `cfg!(debug_assertions)`
/// during monomorphization.
///
Expand Down
1 change: 1 addition & 0 deletions library/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@
#![feature(const_try)]
#![feature(const_type_id)]
#![feature(const_type_name)]
#![feature(const_typed_swap)]
#![feature(const_unicode_case_lookup)]
#![feature(const_unsafecell_get_mut)]
#![feature(const_waker)]
Expand Down
60 changes: 3 additions & 57 deletions library/core/src/mem/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -726,63 +726,9 @@ pub unsafe fn uninitialized<T>() -> T {
#[rustc_const_unstable(feature = "const_swap", issue = "83163")]
#[rustc_diagnostic_item = "mem_swap"]
pub const fn swap<T>(x: &mut T, y: &mut T) {
// NOTE(eddyb) SPIR-V's Logical addressing model doesn't allow for arbitrary
// reinterpretation of values as (chunkable) byte arrays, and the loop in the
// block optimization in `swap_slice` is hard to rewrite back
// into the (unoptimized) direct swapping implementation, so we disable it.
#[cfg(not(any(target_arch = "spirv")))]
{
// For types that are larger multiples of their alignment, the simple way
// tends to copy the whole thing to stack rather than doing it one part
// at a time, so instead treat them as one-element slices and piggy-back
// the slice optimizations that will split up the swaps.
if const { size_of::<T>() / align_of::<T>() > 2 } {
// SAFETY: exclusive references always point to one non-overlapping
// element and are non-null and properly aligned.
return unsafe { ptr::swap_nonoverlapping(x, y, 1) };
}
}

// If a scalar consists of just a small number of alignment units, let
// the codegen just swap those pieces directly, as it's likely just a
// few instructions and anything else is probably overcomplicated.
//
// Most importantly, this covers primitives and simd types that tend to
// have size=align where doing anything else can be a pessimization.
// (This will also be used for ZSTs, though any solution works for them.)
swap_simple(x, y);
}

/// Same as [`swap`] semantically, but always uses the simple implementation.
///
/// Used elsewhere in `mem` and `ptr` at the bottom layer of calls.
#[rustc_const_unstable(feature = "const_swap", issue = "83163")]
#[inline]
pub(crate) const fn swap_simple<T>(x: &mut T, y: &mut T) {
// We arrange for this to typically be called with small types,
// so this reads-and-writes approach is actually better than using
// copy_nonoverlapping as it easily puts things in LLVM registers
// directly and doesn't end up inlining allocas.
// And LLVM actually optimizes it to 3×memcpy if called with
// a type larger than it's willing to keep in a register.
// Having typed reads and writes in MIR here is also good as
// it lets Miri and CTFE understand them better, including things
// like enforcing type validity for them.
// Importantly, read+copy_nonoverlapping+write introduces confusing
// asymmetry to the behaviour where one value went through read+write
// whereas the other was copied over by the intrinsic (see #94371).
// Furthermore, using only read+write here benefits limited backends
// such as SPIR-V that work on an underlying *typed* view of memory,
// and thus have trouble with Rust's untyped memory operations.

// SAFETY: exclusive references are always valid to read/write,
// including being aligned, and nothing here panics so it's drop-safe.
unsafe {
let a = ptr::read(x);
let b = ptr::read(y);
ptr::write(x, b);
ptr::write(y, a);
}
// SAFETY: `&mut` guarantees these are typed readable and writable
// as well as non-overlapping.
unsafe { intrinsics::typed_swap(x, y) }
}

/// Replaces `dest` with the default value of `T`, returning the previous `dest` value.
Expand Down
21 changes: 18 additions & 3 deletions library/core/src/ptr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1062,11 +1062,26 @@ const unsafe fn swap_nonoverlapping_simple_untyped<T>(x: *mut T, y: *mut T, coun
let mut i = 0;
while i < count {
// SAFETY: By precondition, `i` is in-bounds because it's below `n`
let x = unsafe { &mut *x.add(i) };
let x = unsafe { x.add(i) };
// SAFETY: By precondition, `i` is in-bounds because it's below `n`
// and it's distinct from `x` since the ranges are non-overlapping
let y = unsafe { &mut *y.add(i) };
mem::swap_simple::<MaybeUninit<T>>(x, y);
let y = unsafe { y.add(i) };

// If we end up here, it's because we're using a simple type -- like
// a small power-of-two-sized thing -- or a special type with particularly
// large alignment, particularly SIMD types.
// Thus we're fine just reading-and-writing it, as either it's small
// and that works well anyway or it's special and the type's author
// presumably wanted things to be done in the larger chunk.

// SAFETY: we're only ever given pointers that are valid to read/write,
// including being aligned, and nothing here panics so it's drop-safe.
unsafe {
let a: MaybeUninit<T> = read(x);
let b: MaybeUninit<T> = read(y);
write(x, b);
write(y, a);
}

i += 1;
}
Expand Down
19 changes: 19 additions & 0 deletions src/tools/miri/tests/fail/intrinsics/typed-swap-invalid-array.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#![feature(core_intrinsics)]
#![feature(rustc_attrs)]

use std::intrinsics::typed_swap;
use std::ptr::addr_of_mut;

fn invalid_array() {
let mut a = [1_u8; 100];
let mut b = [2_u8; 100];
unsafe {
let a = addr_of_mut!(a).cast::<[bool; 100]>();
let b = addr_of_mut!(b).cast::<[bool; 100]>();
typed_swap(a, b); //~ERROR: constructing invalid value
}
}

fn main() {
invalid_array();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
error: Undefined Behavior: constructing invalid value at [0]: encountered 0x02, but expected a boolean
--> $DIR/typed-swap-invalid-array.rs:LL:CC
|
LL | typed_swap(a, b);
| ^^^^^^^^^^^^^^^^ constructing invalid value at [0]: encountered 0x02, but expected a boolean
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `invalid_array` at $DIR/typed-swap-invalid-array.rs:LL:CC
note: inside `main`
--> $DIR/typed-swap-invalid-array.rs:LL:CC
|
LL | invalid_array();
| ^^^^^^^^^^^^^^^

note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace

error: aborting due to 1 previous error

Loading
Loading