Skip to content

Commit fe53a81

Browse files
committed
rustc: Add support for some more x86 SIMD ops
This commit adds compiler support for two basic operations needed for binding SIMD on x86 platforms: * First, a `nontemporal_store` intrinsic was added for the `_mm_stream_ps`, seen in rust-lang/stdarch#114. This was relatively straightforward and is quite similar to the volatile store intrinsic. * Next, and much more intrusively, a new type to the backend was added. The `x86_mmx` type is used in LLVM for a 64-bit vector register and is used in various intrinsics like `_mm_abs_pi8` as seen in rust-lang/stdarch#74. This new type was added as a new layout option as well as having support added to the trans backend. The type is enabled with the `#[repr(x86_mmx)]` attribute which is intended to just be an implementation detail of SIMD in Rust. I'm not 100% certain about how the `x86_mmx` type was added, so any extra eyes or thoughts on that would be greatly appreciated!
1 parent cc6b88c commit fe53a81

File tree

10 files changed

+125
-4
lines changed

10 files changed

+125
-4
lines changed

src/libcore/intrinsics.rs

+5
Original file line numberDiff line numberDiff line change
@@ -1387,4 +1387,9 @@ extern "rust-intrinsic" {
13871387
/// # } }
13881388
/// ```
13891389
pub fn align_offset(ptr: *const (), align: usize) -> usize;
1390+
1391+
/// Emits a `!nontemporal` store according to LLVM (see their docs).
1392+
/// Probably will never become stable.
1393+
#[cfg(not(stage0))]
1394+
pub fn nontemporal_store<T>(ptr: *mut T, val: T);
13901395
}

src/librustc_llvm/ffi.rs

+1
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,7 @@ extern "C" {
587587

588588
// Operations on other types
589589
pub fn LLVMVoidTypeInContext(C: ContextRef) -> TypeRef;
590+
pub fn LLVMX86MMXTypeInContext(C: ContextRef) -> TypeRef;
590591
pub fn LLVMRustMetadataTypeInContext(C: ContextRef) -> TypeRef;
591592

592593
// Operations on all values

src/librustc_trans/builder.rs

+23
Original file line numberDiff line numberDiff line change
@@ -612,6 +612,29 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
612612
}
613613
}
614614

615+
pub fn nontemporal_store(&self, val: ValueRef, ptr: ValueRef) -> ValueRef {
616+
debug!("Store {:?} -> {:?}", Value(val), Value(ptr));
617+
assert!(!self.llbuilder.is_null());
618+
self.count_insn("store.nontemporal");
619+
let ptr = self.check_store(val, ptr);
620+
unsafe {
621+
let insn = llvm::LLVMBuildStore(self.llbuilder, val, ptr);
622+
623+
// According to LLVM [1] building a nontemporal store must *always*
624+
// point to a metadata value of the integer 1. Who knew?
625+
//
626+
// [1]: http://llvm.org/docs/LangRef.html#store-instruction
627+
let one = C_i32(self.ccx, 1);
628+
let node = llvm::LLVMMDNodeInContext(self.ccx.llcx(),
629+
&one,
630+
1);
631+
llvm::LLVMSetMetadata(insn,
632+
llvm::MD_nontemporal as c_uint,
633+
node);
634+
insn
635+
}
636+
}
637+
615638
pub fn gep(&self, ptr: ValueRef, indices: &[ValueRef]) -> ValueRef {
616639
self.count_insn("gep");
617640
unsafe {

src/librustc_trans/diagnostics.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,13 @@ The generic type has to be a SIMD type. Example:
3737
3838
#[repr(simd)]
3939
#[derive(Copy, Clone)]
40-
struct i32x1(i32);
40+
struct i32x2(i32, i32);
4141
4242
extern "platform-intrinsic" {
4343
fn simd_add<T>(a: T, b: T) -> T;
4444
}
4545
46-
unsafe { simd_add(i32x1(0), i32x1(1)); } // ok!
46+
unsafe { simd_add(i32x2(0, 0), i32x2(1, 2)); } // ok!
4747
```
4848
"##,
4949

src/librustc_trans/intrinsic.rs

+16
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,22 @@ pub fn trans_intrinsic_call<'a, 'tcx>(bcx: &Builder<'a, 'tcx>,
540540
}
541541
}
542542

543+
"nontemporal_store" => {
544+
let tp_ty = substs.type_at(0);
545+
let dst = args[0].deref(bcx.ccx);
546+
let val = if let OperandValue::Ref(ptr, align) = args[1].val {
547+
bcx.load(ptr, align.non_abi())
548+
} else {
549+
from_immediate(bcx, args[1].immediate())
550+
};
551+
let ptr = bcx.pointercast(dst.llval, val_ty(val).ptr_to());
552+
let store = bcx.nontemporal_store(val, ptr);
553+
unsafe {
554+
llvm::LLVMSetAlignment(store, ccx.align_of(tp_ty).abi() as u32);
555+
}
556+
return
557+
}
558+
543559
_ => {
544560
let intr = match Intrinsic::find(&name) {
545561
Some(intr) => intr,

src/librustc_trans/type_.rs

+4
Original file line numberDiff line numberDiff line change
@@ -286,4 +286,8 @@ impl Type {
286286
Type::i8(ccx)
287287
}
288288
}
289+
290+
pub fn x86_mmx(ccx: &CrateContext) -> Type {
291+
ty!(llvm::LLVMX86MMXTypeInContext(ccx.llcx()))
292+
}
289293
}

src/librustc_trans/type_of.rs

+17-2
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,23 @@ fn uncached_llvm_type<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>,
2626
match layout.abi {
2727
layout::Abi::Scalar(_) => bug!("handled elsewhere"),
2828
layout::Abi::Vector => {
29-
return Type::vector(&layout.field(ccx, 0).llvm_type(ccx),
30-
layout.fields.count() as u64);
29+
// LLVM has a separate type for 64-bit SIMD vectors on X86 called
30+
// `x86_mmx` which is needed for some SIMD operations. As a bit of a
31+
// hack (all SIMD definitions are super unstable anyway) we
32+
// recognize any one-element SIMD vector as "this should be an
33+
// x86_mmx" type. In general there shouldn't be a need for other
34+
// one-element SIMD vectors, so it's assumed this won't clash with
35+
// much else.
36+
let use_x86_mmx = layout.fields.count() == 1 &&
37+
layout.size.bits() == 64 &&
38+
(ccx.sess().target.target.arch == "x86" ||
39+
ccx.sess().target.target.arch == "x86_64");
40+
if use_x86_mmx {
41+
return Type::x86_mmx(ccx)
42+
} else {
43+
return Type::vector(&layout.field(ccx, 0).llvm_type(ccx),
44+
layout.fields.count() as u64);
45+
}
3146
}
3247
layout::Abi::ScalarPair(..) => {
3348
return Type::struct_(ccx, &[

src/librustc_typeck/check/intrinsic.rs

+4
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,10 @@ pub fn check_intrinsic_type<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
318318
(0, vec![ptr_ty, tcx.types.usize], tcx.types.usize)
319319
},
320320

321+
"nontemporal_store" => {
322+
(1, vec![ tcx.mk_mut_ptr(param(0)), param(0) ], tcx.mk_nil())
323+
}
324+
321325
ref other => {
322326
struct_span_err!(tcx.sess, it.span, E0093,
323327
"unrecognized intrinsic function: `{}`",

src/test/codegen/nontemporal.rs

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
// compile-flags: -O
12+
13+
#![feature(core_intrinsics)]
14+
#![crate_type = "lib"]
15+
16+
#[no_mangle]
17+
pub fn a(a: &mut u32, b: u32) {
18+
// CHECK-LABEL: define void @a
19+
// CHECK: store i32 %b, i32* %a, align 4, !nontemporal
20+
unsafe {
21+
std::intrinsics::nontemporal_store(a, b);
22+
}
23+
}

src/test/codegen/x86_mmx.rs

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
// ignore-arm
12+
// ignore-aarch64
13+
// ignore-emscripten
14+
// compile-flags: -O
15+
16+
#![feature(repr_simd)]
17+
#![crate_type="lib"]
18+
19+
#[repr(simd)]
20+
#[derive(Clone, Copy)]
21+
pub struct i8x8(u64);
22+
23+
#[no_mangle]
24+
pub fn a(a: &mut i8x8, b: i8x8) -> i8x8 {
25+
// CHECK-LABEL: define x86_mmx @a(x86_mmx*{{.*}}, x86_mmx{{.*}})
26+
// CHECK: store x86_mmx %b, x86_mmx* %a
27+
// CHECK: ret x86_mmx %b
28+
*a = b;
29+
return b
30+
}

0 commit comments

Comments
 (0)