Skip to content

Commit 8d0d26e

Browse files
committed
nontemporal_store: make sure that the intrinsic is truly just a hint
1 parent 2ccafed commit 8d0d26e

File tree

5 files changed

+47
-15
lines changed

5 files changed

+47
-15
lines changed

compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -725,7 +725,8 @@ fn codegen_regular_intrinsic_call<'tcx>(
725725

726726
// Cranelift treats stores as volatile by default
727727
// FIXME correctly handle unaligned_volatile_store
728-
// FIXME actually do nontemporal stores if requested
728+
// FIXME actually do nontemporal stores if requested (but see the LLVM backend for which
729+
// architectures even allow this in a sane way)
729730
let dest = CPlace::for_ptr(Pointer::new(ptr), val.layout());
730731
dest.write_cvalue(fx, val);
731732
}

compiler/rustc_codegen_gcc/src/builder.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1128,6 +1128,8 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
11281128
self.llbb().add_assignment(self.location, aligned_destination, val);
11291129
// TODO(antoyo): handle align and flags.
11301130
// NOTE: dummy value here since it's never used. FIXME(antoyo): API should not return a value here?
1131+
// When adding support for NONTEMPORAL, see see the LLVM backend for which
1132+
// architectures even allow this in a sane way.
11311133
self.cx.context.new_rvalue_zero(self.type_i32())
11321134
}
11331135

compiler/rustc_codegen_llvm/src/builder.rs

+21-7
Original file line numberDiff line numberDiff line change
@@ -725,13 +725,27 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
725725
llvm::LLVMSetVolatile(store, llvm::True);
726726
}
727727
if flags.contains(MemFlags::NONTEMPORAL) {
728-
// According to LLVM [1] building a nontemporal store must
729-
// *always* point to a metadata value of the integer 1.
730-
//
731-
// [1]: https://llvm.org/docs/LangRef.html#store-instruction
732-
let one = self.cx.const_i32(1);
733-
let node = llvm::LLVMMDNodeInContext(self.cx.llcx, &one, 1);
734-
llvm::LLVMSetMetadata(store, llvm::MD_nontemporal as c_uint, node);
728+
// Make sure that the current target architectures supports "sane" non-temporal
729+
// stores, i.e., non-temporal stores that are equivalent to regular stores except
730+
// for performance. LLVM doesn't seem to care about this, and will happily treat
731+
// `!nontemporal` stores as-if they were normal stores (for reordering optimizations
732+
// etc) even on x86, despite later lowering them to MOVNT which do *not* behave like
733+
// regular stores but require special fences.
734+
// So we keep a list of architectures where `!nontemporal` is known to be truly just
735+
// a hint, and use regular stores everywhere else.
736+
const WELL_BEHAVED_NONTEMPORAL_ARCHS: &[&str] = &["aarch64", "arm"];
737+
738+
let use_nontemporal =
739+
WELL_BEHAVED_NONTEMPORAL_ARCHS.contains(&&*self.cx.tcx.sess.target.arch);
740+
if use_nontemporal {
741+
// According to LLVM [1] building a nontemporal store must
742+
// *always* point to a metadata value of the integer 1.
743+
//
744+
// [1]: https://llvm.org/docs/LangRef.html#store-instruction
745+
let one = self.cx.const_i32(1);
746+
let node = llvm::LLVMMDNodeInContext(self.cx.llcx, &one, 1);
747+
llvm::LLVMSetMetadata(store, llvm::MD_nontemporal as c_uint, node);
748+
}
735749
}
736750
store
737751
}

library/core/src/intrinsics.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -2405,12 +2405,12 @@ extern "rust-intrinsic" {
24052405
#[rustc_nounwind]
24062406
pub fn catch_unwind(try_fn: fn(*mut u8), data: *mut u8, catch_fn: fn(*mut u8, *mut u8)) -> i32;
24072407

2408-
/// Emits a `!nontemporal` store according to LLVM (see their docs).
2409-
/// Probably will never become stable.
2408+
/// Emits a `nontemporal` store, which gives a hint to the CPU that the data should not be held
2409+
/// in cache. Except for performance, this is fully equivalent to `ptr.write(val)`.
24102410
///
2411-
/// Do NOT use this intrinsic; "nontemporal" operations do not exist in our memory model!
2412-
/// It exists to support current stdarch, but the plan is to change stdarch and remove this intrinsic.
2413-
/// See <https://github.com/rust-lang/rust/issues/114582> for some more discussion.
2411+
/// Not all architectures provide such an operation. For instance, x86 does not: while `MOVNT`
2412+
/// exists, that operation is *not* equivalent to `ptr.write(val)` (`MOVNT` writes can be reordered
2413+
/// in ways that are not allowed for regular writes).
24142414
#[rustc_nounwind]
24152415
pub fn nontemporal_store<T>(ptr: *mut T, val: T);
24162416

+17-2
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,28 @@
11
//@ compile-flags: -O
2+
//@ compile-flags: --target aarch64-unknown-linux-gnu
3+
//@ needs-llvm-components: aarch64
24

3-
#![feature(core_intrinsics)]
5+
#![feature(no_core, lang_items, intrinsics)]
6+
#![no_core]
47
#![crate_type = "lib"]
58

9+
#[lang = "sized"]
10+
pub trait Sized {}
11+
#[lang = "copy"]
12+
pub trait Copy {}
13+
14+
impl Copy for u32 {}
15+
impl<T> Copy for *mut T {}
16+
17+
extern "rust-intrinsic" {
18+
pub fn nontemporal_store<T>(ptr: *mut T, val: T);
19+
}
20+
621
#[no_mangle]
722
pub fn a(a: &mut u32, b: u32) {
823
// CHECK-LABEL: define{{.*}}void @a
924
// CHECK: store i32 %b, ptr %a, align 4, !nontemporal
1025
unsafe {
11-
std::intrinsics::nontemporal_store(a, b);
26+
nontemporal_store(a, b);
1227
}
1328
}

0 commit comments

Comments
 (0)