diff --git a/tests/assembly/simd-intrinsic-mask-load.rs b/tests/assembly/simd-intrinsic-mask-load.rs index b80af1141cc5..e94ca9b382a8 100644 --- a/tests/assembly/simd-intrinsic-mask-load.rs +++ b/tests/assembly/simd-intrinsic-mask-load.rs @@ -1,10 +1,11 @@ -// verify that simd mask reductions do not introduce additional bit shift operations -//@ revisions: x86 aarch64 -//@ [x86] compile-flags: --target=x86_64-unknown-linux-gnu -C llvm-args=-x86-asm-syntax=intel -//@ [x86] needs-llvm-components: x86 -//@ [aarch64] compile-flags: --target=aarch64-unknown-linux-gnu -//@ [aarch64] needs-llvm-components: aarch64 -//@ [aarch64] min-llvm-version: 15.0 +// verify that simd masked load does not introduce additional bit shift operations +//@ revisions: x86-avx x86-avx512 +//@ [x86-avx] compile-flags: --target=x86_64-unknown-linux-gnu -C llvm-args=-x86-asm-syntax=intel +//@ [x86-avx] compile-flags: -C target-feature=+avx +//@ [x86-avx] needs-llvm-components: x86 +//@ [x86-avx512] compile-flags: --target=x86_64-unknown-linux-gnu -C llvm-args=-x86-asm-syntax=intel +//@ [x86-avx512] compile-flags: -C target-feature=+avx512f,+avx512vl,+avx512bw,+avx512dq +//@ [x86-avx512] needs-llvm-components: x86 //@ assembly-output: emit-asm //@ compile-flags: --crate-type=lib -O @@ -20,29 +21,43 @@ pub trait Sized {} trait Copy {} #[repr(simd)] -pub struct mask8x16([i8; 16]); +pub struct f32x8([f32; 8]); + +#[repr(simd)] +pub struct m32x8([i32; 8]); + +#[repr(simd)] +pub struct f64x4([f64; 4]); + +#[repr(simd)] +pub struct m64x4([i64; 4]); extern "rust-intrinsic" { - fn simd_reduce_all(x: T) -> bool; - fn simd_reduce_any(x: T) -> bool; + fn simd_masked_load(mask: M, pointer: P, values: T) -> T; } -// CHECK-LABEL: mask_reduce_all: +// CHECK-LABEL: load_f32x8 #[no_mangle] -pub unsafe fn mask_reduce_all(m: mask8x16) -> bool { - // x86: movdqa - // x86-NEXT: pmovmskb - // aarch64: cmge - // aarch64-NEXT: umaxv - simd_reduce_all(m) +pub unsafe fn load_f32x8(mask: m32x8, pointer: *const f32, output: *mut f32x8) { + // x86-avx-NOT: vpslld + // x86-avx: vmovaps ymm0 + // x86-avx-NEXT: vmaskmovps + // x86-avx512-NOT: vpslld + // x86-avx512: vpcmpgtd k1 + // x86-avx512-NEXT: vmovups ymm0 {k1} {z} + // x86-avx512-NEXT: vmovaps + *output = simd_masked_load(mask, pointer, f32x8([0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32])) } -// CHECK-LABEL: mask_reduce_any: +// CHECK-LABEL: load_f64x4 #[no_mangle] -pub unsafe fn mask_reduce_any(m: mask8x16) -> bool { - // x86: movdqa - // x86-NEXT: pmovmskb - // aarch64: cmlt - // aarch64-NEXT: umaxv - simd_reduce_any(m) +pub unsafe fn load_f64x4(mask: m64x4, pointer: *const f64, output: *mut f64x4) { + // x86-avx-NOT: vpsllq + // x86-avx: vmovapd + // x86-avx-NEXT: vmaskmovpd ymm0 + // x86-avx512-NOT: vpsllq + // x86-avx512: vpcmpgtq k1 + // x86-avx512-NEXT: vmovupd ymm0 {k1} {z} + // x86-avx512-NEXT: vmovapd + *output = simd_masked_load(mask, pointer, f64x4([0_f64, 0_f64, 0_f64, 0_f64])) }