Skip to content

Commit 7c26420

Browse files
authored
Unrolled build for rust-lang#128149
Rollup merge of rust-lang#128149 - RalfJung:nontemporal_store, r=jieyouxu,Amanieu,Jubilee nontemporal_store: make sure that the intrinsic is truly just a hint The `!nontemporal` flag for stores in LLVM *sounds* like it is just a hint, but actually, it is not -- at least on x86, non-temporal stores need very special treatment by the programmer or else the Rust memory model breaks down. LLVM still treats these stores as-if they were normal stores for optimizations, which is [highly dubious](llvm/llvm-project#64521). Let's avoid all that dubiousness by making our own non-temporal stores be truly just a hint, which is possible on some targets (e.g. ARM). On all other targets, non-temporal stores become regular stores. ~~Blocked on rust-lang/stdarch#1541 propagating to the rustc repo, to make sure the `_mm_stream` intrinsics are unaffected by this change.~~ Fixes rust-lang#114582 Cc `@Amanieu` `@workingjubilee`
2 parents e08b80c + 75743dc commit 7c26420

File tree

5 files changed

+62
-16
lines changed

5 files changed

+62
-16
lines changed

compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -725,7 +725,8 @@ fn codegen_regular_intrinsic_call<'tcx>(
725725

726726
// Cranelift treats stores as volatile by default
727727
// FIXME correctly handle unaligned_volatile_store
728-
// FIXME actually do nontemporal stores if requested
728+
// FIXME actually do nontemporal stores if requested (but do not just emit MOVNT on x86;
729+
// see the LLVM backend for details)
729730
let dest = CPlace::for_ptr(Pointer::new(ptr), val.layout());
730731
dest.write_cvalue(fx, val);
731732
}

compiler/rustc_codegen_gcc/src/builder.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1127,6 +1127,8 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
11271127
self.llbb().add_assignment(self.location, aligned_destination, val);
11281128
// TODO(antoyo): handle align and flags.
11291129
// NOTE: dummy value here since it's never used. FIXME(antoyo): API should not return a value here?
1130+
// When adding support for NONTEMPORAL, make sure to not just emit MOVNT on x86; see the
1131+
// LLVM backend for details.
11301132
self.cx.context.new_rvalue_zero(self.type_i32())
11311133
}
11321134

compiler/rustc_codegen_llvm/src/builder.rs

+26-7
Original file line numberDiff line numberDiff line change
@@ -728,13 +728,32 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
728728
llvm::LLVMSetVolatile(store, llvm::True);
729729
}
730730
if flags.contains(MemFlags::NONTEMPORAL) {
731-
// According to LLVM [1] building a nontemporal store must
732-
// *always* point to a metadata value of the integer 1.
733-
//
734-
// [1]: https://llvm.org/docs/LangRef.html#store-instruction
735-
let one = self.cx.const_i32(1);
736-
let node = llvm::LLVMMDNodeInContext(self.cx.llcx, &one, 1);
737-
llvm::LLVMSetMetadata(store, llvm::MD_nontemporal as c_uint, node);
731+
// Make sure that the current target architectures supports "sane" non-temporal
732+
// stores, i.e., non-temporal stores that are equivalent to regular stores except
733+
// for performance. LLVM doesn't seem to care about this, and will happily treat
734+
// `!nontemporal` stores as-if they were normal stores (for reordering optimizations
735+
// etc) even on x86, despite later lowering them to MOVNT which do *not* behave like
736+
// regular stores but require special fences.
737+
// So we keep a list of architectures where `!nontemporal` is known to be truly just
738+
// a hint, and use regular stores everywhere else.
739+
// (In the future, we could alternatively ensure that an sfence gets emitted after a sequence of movnt
740+
// before any kind of synchronizing operation. But it's not clear how to do that with LLVM.)
741+
// For more context, see <https://github.com/rust-lang/rust/issues/114582> and
742+
// <https://github.com/llvm/llvm-project/issues/64521>.
743+
const WELL_BEHAVED_NONTEMPORAL_ARCHS: &[&str] =
744+
&["aarch64", "arm", "riscv32", "riscv64"];
745+
746+
let use_nontemporal =
747+
WELL_BEHAVED_NONTEMPORAL_ARCHS.contains(&&*self.cx.tcx.sess.target.arch);
748+
if use_nontemporal {
749+
// According to LLVM [1] building a nontemporal store must
750+
// *always* point to a metadata value of the integer 1.
751+
//
752+
// [1]: https://llvm.org/docs/LangRef.html#store-instruction
753+
let one = self.cx.const_i32(1);
754+
let node = llvm::LLVMMDNodeInContext(self.cx.llcx, &one, 1);
755+
llvm::LLVMSetMetadata(store, llvm::MD_nontemporal as c_uint, node);
756+
}
738757
}
739758
store
740759
}

library/core/src/intrinsics.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -2675,12 +2675,12 @@ extern "rust-intrinsic" {
26752675
#[rustc_nounwind]
26762676
pub fn catch_unwind(try_fn: fn(*mut u8), data: *mut u8, catch_fn: fn(*mut u8, *mut u8)) -> i32;
26772677

2678-
/// Emits a `!nontemporal` store according to LLVM (see their docs).
2679-
/// Probably will never become stable.
2678+
/// Emits a `nontemporal` store, which gives a hint to the CPU that the data should not be held
2679+
/// in cache. Except for performance, this is fully equivalent to `ptr.write(val)`.
26802680
///
2681-
/// Do NOT use this intrinsic; "nontemporal" operations do not exist in our memory model!
2682-
/// It exists to support current stdarch, but the plan is to change stdarch and remove this intrinsic.
2683-
/// See <https://github.com/rust-lang/rust/issues/114582> for some more discussion.
2681+
/// Not all architectures provide such an operation. For instance, x86 does not: while `MOVNT`
2682+
/// exists, that operation is *not* equivalent to `ptr.write(val)` (`MOVNT` writes can be reordered
2683+
/// in ways that are not allowed for regular writes).
26842684
#[rustc_nounwind]
26852685
pub fn nontemporal_store<T>(ptr: *mut T, val: T);
26862686

+27-3
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,37 @@
11
//@ compile-flags: -O
2+
//@revisions: with_nontemporal without_nontemporal
3+
//@[with_nontemporal] compile-flags: --target aarch64-unknown-linux-gnu
4+
//@[with_nontemporal] needs-llvm-components: aarch64
5+
//@[without_nontemporal] compile-flags: --target x86_64-unknown-linux-gnu
6+
//@[without_nontemporal] needs-llvm-components: x86
27

3-
#![feature(core_intrinsics)]
8+
// Ensure that we *do* emit the `!nontemporal` flag on architectures where it
9+
// is well-behaved, but do *not* emit it on architectures where it is ill-behaved.
10+
// For more context, see <https://github.com/rust-lang/rust/issues/114582> and
11+
// <https://github.com/llvm/llvm-project/issues/64521>.
12+
13+
#![feature(no_core, lang_items, intrinsics)]
14+
#![no_core]
415
#![crate_type = "lib"]
516

17+
#[lang = "sized"]
18+
pub trait Sized {}
19+
#[lang = "copy"]
20+
pub trait Copy {}
21+
22+
impl Copy for u32 {}
23+
impl<T> Copy for *mut T {}
24+
25+
extern "rust-intrinsic" {
26+
pub fn nontemporal_store<T>(ptr: *mut T, val: T);
27+
}
28+
629
#[no_mangle]
730
pub fn a(a: &mut u32, b: u32) {
831
// CHECK-LABEL: define{{.*}}void @a
9-
// CHECK: store i32 %b, ptr %a, align 4, !nontemporal
32+
// with_nontemporal: store i32 %b, ptr %a, align 4, !nontemporal
33+
// without_nontemporal-NOT: nontemporal
1034
unsafe {
11-
std::intrinsics::nontemporal_store(a, b);
35+
nontemporal_store(a, b);
1236
}
1337
}

0 commit comments

Comments
 (0)