From 107a38f5d7882a974d3e33eeded1a670988c9fb2 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 11 May 2021 11:13:25 +0200 Subject: [PATCH 1/5] manually const-ify shuffle arguments This means we do not rely on promotion any more for these arguments --- .../core_arch/src/aarch64/neon/generated.rs | 90 ++--- crates/core_arch/src/aarch64/neon/mod.rs | 28 +- crates/core_arch/src/arm_shared/neon/mod.rs | 208 +++++----- crates/core_arch/src/macros.rs | 120 ++++++ crates/core_arch/src/powerpc/vsx.rs | 8 +- crates/core_arch/src/x86/avx.rs | 94 ++--- crates/core_arch/src/x86/avx2.rs | 159 ++++---- crates/core_arch/src/x86/avx512bw.rs | 90 ++--- crates/core_arch/src/x86/avx512f.rs | 356 +++++++++--------- crates/core_arch/src/x86/sse.rs | 28 +- crates/core_arch/src/x86/sse2.rs | 54 +-- crates/core_arch/src/x86/sse3.rs | 6 +- crates/core_arch/src/x86/sse41.rs | 24 +- crates/core_arch/src/x86/ssse3.rs | 4 +- crates/stdarch-gen/neon.spec | 90 ++--- 15 files changed, 740 insertions(+), 619 deletions(-) diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index 3a39d5f845..c02b59be7f 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -40,8 +40,8 @@ pub unsafe fn vabdq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uabdl))] pub unsafe fn vabdl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t { - let c: uint8x8_t = simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); - let d: uint8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let c: uint8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + let d: uint8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); simd_cast(vabd_u8(c, d)) } @@ -50,8 +50,8 @@ pub unsafe fn vabdl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uabdl))] pub unsafe fn vabdl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t { - let c: uint16x4_t = simd_shuffle4(a, a, [4, 5, 6, 7]); - let d: uint16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]); + let c: uint16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]); + let d: uint16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]); simd_cast(vabd_u16(c, d)) } @@ -60,8 +60,8 @@ pub unsafe fn vabdl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uabdl))] pub unsafe fn vabdl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t { - let c: uint32x2_t = simd_shuffle2(a, a, [2, 3]); - let d: uint32x2_t = simd_shuffle2(b, b, [2, 3]); + let c: uint32x2_t = simd_shuffle2!(a, a, [2, 3]); + let d: uint32x2_t = simd_shuffle2!(b, b, [2, 3]); simd_cast(vabd_u32(c, d)) } @@ -70,8 +70,8 @@ pub unsafe fn vabdl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sabdl))] pub unsafe fn vabdl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t { - let c: int8x8_t = simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); - let d: int8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let c: int8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + let d: int8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); let e: uint8x8_t = simd_cast(vabd_s8(c, d)); simd_cast(e) } @@ -81,8 +81,8 @@ pub unsafe fn vabdl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sabdl))] pub unsafe fn vabdl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { - let c: int16x4_t = simd_shuffle4(a, a, [4, 5, 6, 7]); - let d: int16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]); + let c: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]); + let d: int16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]); let e: uint16x4_t = simd_cast(vabd_s16(c, d)); simd_cast(e) } @@ -92,8 +92,8 @@ pub unsafe fn vabdl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sabdl))] pub unsafe fn vabdl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { - let c: int32x2_t = simd_shuffle2(a, a, [2, 3]); - let d: int32x2_t = simd_shuffle2(b, b, [2, 3]); + let c: int32x2_t = simd_shuffle2!(a, a, [2, 3]); + let d: int32x2_t = simd_shuffle2!(b, b, [2, 3]); let e: uint32x2_t = simd_cast(vabd_s32(c, d)); simd_cast(e) } @@ -2077,7 +2077,7 @@ pub unsafe fn vcvt_f64_f32(a: float32x2_t) -> float64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fcvtl))] pub unsafe fn vcvt_high_f64_f32(a: float32x4_t) -> float64x2_t { - let b: float32x2_t = simd_shuffle2(a, a, [2, 3]); + let b: float32x2_t = simd_shuffle2!(a, a, [2, 3]); simd_cast(b) } @@ -2094,7 +2094,7 @@ pub unsafe fn vcvt_f32_f64(a: float64x2_t) -> float32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fcvtn))] pub unsafe fn vcvt_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t { - simd_shuffle4(a, simd_cast(b), [0, 1, 2, 3]) + simd_shuffle4!(a, simd_cast(b), [0, 1, 2, 3]) } /// Floating-point convert to lower precision narrow, rounding to odd @@ -2115,7 +2115,7 @@ pub unsafe fn vcvtx_f32_f64(a: float64x2_t) -> float32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fcvtxn))] pub unsafe fn vcvtx_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t { - simd_shuffle4(a, vcvtx_f32_f64(b), [0, 1, 2, 3]) + simd_shuffle4!(a, vcvtx_f32_f64(b), [0, 1, 2, 3]) } /// Fixed-point convert to floating-point @@ -5167,7 +5167,7 @@ pub unsafe fn vaddlvq_u32(a: uint32x4_t) -> u64 { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ssubw))] pub unsafe fn vsubw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { - let c: int8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let c: int8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); simd_sub(a, simd_cast(c)) } @@ -5176,7 +5176,7 @@ pub unsafe fn vsubw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ssubw))] pub unsafe fn vsubw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { - let c: int16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]); + let c: int16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]); simd_sub(a, simd_cast(c)) } @@ -5185,7 +5185,7 @@ pub unsafe fn vsubw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ssubw))] pub unsafe fn vsubw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { - let c: int32x2_t = simd_shuffle2(b, b, [2, 3]); + let c: int32x2_t = simd_shuffle2!(b, b, [2, 3]); simd_sub(a, simd_cast(c)) } @@ -5194,7 +5194,7 @@ pub unsafe fn vsubw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(usubw))] pub unsafe fn vsubw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { - let c: uint8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let c: uint8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); simd_sub(a, simd_cast(c)) } @@ -5203,7 +5203,7 @@ pub unsafe fn vsubw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(usubw))] pub unsafe fn vsubw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { - let c: uint16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]); + let c: uint16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]); simd_sub(a, simd_cast(c)) } @@ -5212,7 +5212,7 @@ pub unsafe fn vsubw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(usubw))] pub unsafe fn vsubw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { - let c: uint32x2_t = simd_shuffle2(b, b, [2, 3]); + let c: uint32x2_t = simd_shuffle2!(b, b, [2, 3]); simd_sub(a, simd_cast(c)) } @@ -5221,9 +5221,9 @@ pub unsafe fn vsubw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ssubl))] pub unsafe fn vsubl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t { - let c: int8x8_t = simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + let c: int8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); let d: int16x8_t = simd_cast(c); - let e: int8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let e: int8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); let f: int16x8_t = simd_cast(e); simd_sub(d, f) } @@ -5233,9 +5233,9 @@ pub unsafe fn vsubl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ssubl))] pub unsafe fn vsubl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { - let c: int16x4_t = simd_shuffle4(a, a, [4, 5, 6, 7]); + let c: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]); let d: int32x4_t = simd_cast(c); - let e: int16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]); + let e: int16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]); let f: int32x4_t = simd_cast(e); simd_sub(d, f) } @@ -5245,9 +5245,9 @@ pub unsafe fn vsubl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(ssubl))] pub unsafe fn vsubl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { - let c: int32x2_t = simd_shuffle2(a, a, [2, 3]); + let c: int32x2_t = simd_shuffle2!(a, a, [2, 3]); let d: int64x2_t = simd_cast(c); - let e: int32x2_t = simd_shuffle2(b, b, [2, 3]); + let e: int32x2_t = simd_shuffle2!(b, b, [2, 3]); let f: int64x2_t = simd_cast(e); simd_sub(d, f) } @@ -5257,9 +5257,9 @@ pub unsafe fn vsubl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(usubl))] pub unsafe fn vsubl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t { - let c: uint8x8_t = simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + let c: uint8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); let d: uint16x8_t = simd_cast(c); - let e: uint8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let e: uint8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); let f: uint16x8_t = simd_cast(e); simd_sub(d, f) } @@ -5269,9 +5269,9 @@ pub unsafe fn vsubl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(usubl))] pub unsafe fn vsubl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t { - let c: uint16x4_t = simd_shuffle4(a, a, [4, 5, 6, 7]); + let c: uint16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]); let d: uint32x4_t = simd_cast(c); - let e: uint16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]); + let e: uint16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]); let f: uint32x4_t = simd_cast(e); simd_sub(d, f) } @@ -5281,9 +5281,9 @@ pub unsafe fn vsubl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(usubl))] pub unsafe fn vsubl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t { - let c: uint32x2_t = simd_shuffle2(a, a, [2, 3]); + let c: uint32x2_t = simd_shuffle2!(a, a, [2, 3]); let d: uint64x2_t = simd_cast(c); - let e: uint32x2_t = simd_shuffle2(b, b, [2, 3]); + let e: uint32x2_t = simd_shuffle2!(b, b, [2, 3]); let f: uint64x2_t = simd_cast(e); simd_sub(d, f) } @@ -8805,8 +8805,8 @@ pub unsafe fn vuzp2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uabal))] pub unsafe fn vabal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t { - let d: uint8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); - let e: uint8x8_t = simd_shuffle8(c, c, [8, 9, 10, 11, 12, 13, 14, 15]); + let d: uint8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let e: uint8x8_t = simd_shuffle8!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]); let f: uint8x8_t = vabd_u8(d, e); simd_add(a, simd_cast(f)) } @@ -8816,8 +8816,8 @@ pub unsafe fn vabal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uabal))] pub unsafe fn vabal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t { - let d: uint16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]); - let e: uint16x4_t = simd_shuffle4(c, c, [4, 5, 6, 7]); + let d: uint16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]); + let e: uint16x4_t = simd_shuffle4!(c, c, [4, 5, 6, 7]); let f: uint16x4_t = vabd_u16(d, e); simd_add(a, simd_cast(f)) } @@ -8827,8 +8827,8 @@ pub unsafe fn vabal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uin #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uabal))] pub unsafe fn vabal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t { - let d: uint32x2_t = simd_shuffle2(b, b, [2, 3]); - let e: uint32x2_t = simd_shuffle2(c, c, [2, 3]); + let d: uint32x2_t = simd_shuffle2!(b, b, [2, 3]); + let e: uint32x2_t = simd_shuffle2!(c, c, [2, 3]); let f: uint32x2_t = vabd_u32(d, e); simd_add(a, simd_cast(f)) } @@ -8838,8 +8838,8 @@ pub unsafe fn vabal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uin #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sabal))] pub unsafe fn vabal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t { - let d: int8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); - let e: int8x8_t = simd_shuffle8(c, c, [8, 9, 10, 11, 12, 13, 14, 15]); + let d: int8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let e: int8x8_t = simd_shuffle8!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]); let f: int8x8_t = vabd_s8(d, e); let f: uint8x8_t = simd_cast(f); simd_add(a, simd_cast(f)) @@ -8850,8 +8850,8 @@ pub unsafe fn vabal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8 #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sabal))] pub unsafe fn vabal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t { - let d: int16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]); - let e: int16x4_t = simd_shuffle4(c, c, [4, 5, 6, 7]); + let d: int16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]); + let e: int16x4_t = simd_shuffle4!(c, c, [4, 5, 6, 7]); let f: int16x4_t = vabd_s16(d, e); let f: uint16x4_t = simd_cast(f); simd_add(a, simd_cast(f)) @@ -8862,8 +8862,8 @@ pub unsafe fn vabal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sabal))] pub unsafe fn vabal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t { - let d: int32x2_t = simd_shuffle2(b, b, [2, 3]); - let e: int32x2_t = simd_shuffle2(c, c, [2, 3]); + let d: int32x2_t = simd_shuffle2!(b, b, [2, 3]); + let e: int32x2_t = simd_shuffle2!(c, c, [2, 3]); let f: int32x2_t = vabd_s32(d, e); let f: uint32x2_t = simd_cast(f); simd_add(a, simd_cast(f)) diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs index 2af220dace..cc1e3f0626 100644 --- a/crates/core_arch/src/aarch64/neon/mod.rs +++ b/crates/core_arch/src/aarch64/neon/mod.rs @@ -1595,7 +1595,7 @@ pub unsafe fn vext_f64(a: float64x1_t, _b: float64x1_t) -> float64 #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_s8(low: int8x8_t, high: int8x8_t) -> int8x16_t { - simd_shuffle16( + simd_shuffle16!( low, high, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], @@ -1607,7 +1607,7 @@ pub unsafe fn vcombine_s8(low: int8x8_t, high: int8x8_t) -> int8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_s16(low: int16x4_t, high: int16x4_t) -> int16x8_t { - simd_shuffle8(low, high, [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(low, high, [0, 1, 2, 3, 4, 5, 6, 7]) } /// Vector combine @@ -1615,7 +1615,7 @@ pub unsafe fn vcombine_s16(low: int16x4_t, high: int16x4_t) -> int16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_s32(low: int32x2_t, high: int32x2_t) -> int32x4_t { - simd_shuffle4(low, high, [0, 1, 2, 3]) + simd_shuffle4!(low, high, [0, 1, 2, 3]) } /// Vector combine @@ -1623,7 +1623,7 @@ pub unsafe fn vcombine_s32(low: int32x2_t, high: int32x2_t) -> int32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_s64(low: int64x1_t, high: int64x1_t) -> int64x2_t { - simd_shuffle2(low, high, [0, 1]) + simd_shuffle2!(low, high, [0, 1]) } /// Vector combine @@ -1631,7 +1631,7 @@ pub unsafe fn vcombine_s64(low: int64x1_t, high: int64x1_t) -> int64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_u8(low: uint8x8_t, high: uint8x8_t) -> uint8x16_t { - simd_shuffle16( + simd_shuffle16!( low, high, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], @@ -1643,7 +1643,7 @@ pub unsafe fn vcombine_u8(low: uint8x8_t, high: uint8x8_t) -> uint8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_u16(low: uint16x4_t, high: uint16x4_t) -> uint16x8_t { - simd_shuffle8(low, high, [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(low, high, [0, 1, 2, 3, 4, 5, 6, 7]) } /// Vector combine @@ -1651,7 +1651,7 @@ pub unsafe fn vcombine_u16(low: uint16x4_t, high: uint16x4_t) -> uint16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_u32(low: uint32x2_t, high: uint32x2_t) -> uint32x4_t { - simd_shuffle4(low, high, [0, 1, 2, 3]) + simd_shuffle4!(low, high, [0, 1, 2, 3]) } /// Vector combine @@ -1659,7 +1659,7 @@ pub unsafe fn vcombine_u32(low: uint32x2_t, high: uint32x2_t) -> uint32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_u64(low: uint64x1_t, high: uint64x1_t) -> uint64x2_t { - simd_shuffle2(low, high, [0, 1]) + simd_shuffle2!(low, high, [0, 1]) } /// Vector combine @@ -1667,7 +1667,7 @@ pub unsafe fn vcombine_u64(low: uint64x1_t, high: uint64x1_t) -> uint64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_p64(low: poly64x1_t, high: poly64x1_t) -> poly64x2_t { - simd_shuffle2(low, high, [0, 1]) + simd_shuffle2!(low, high, [0, 1]) } /// Duplicate vector element to vector or scalar @@ -1772,7 +1772,7 @@ pub unsafe fn vget_low_p64(a: poly64x2_t) -> poly64x1_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_f16 ( low: float16x4_t, high: float16x4_t) -> float16x8_t { - simd_shuffle8(low, high, [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(low, high, [0, 1, 2, 3, 4, 5, 6, 7]) } */ @@ -1781,7 +1781,7 @@ pub unsafe fn vcombine_f16 ( low: float16x4_t, high: float16x4_t) -> float16x8_ #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_f32(low: float32x2_t, high: float32x2_t) -> float32x4_t { - simd_shuffle4(low, high, [0, 1, 2, 3]) + simd_shuffle4!(low, high, [0, 1, 2, 3]) } /// Vector combine @@ -1789,7 +1789,7 @@ pub unsafe fn vcombine_f32(low: float32x2_t, high: float32x2_t) -> float32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_p8(low: poly8x8_t, high: poly8x8_t) -> poly8x16_t { - simd_shuffle16( + simd_shuffle16!( low, high, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], @@ -1801,7 +1801,7 @@ pub unsafe fn vcombine_p8(low: poly8x8_t, high: poly8x8_t) -> poly8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_p16(low: poly16x4_t, high: poly16x4_t) -> poly16x8_t { - simd_shuffle8(low, high, [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(low, high, [0, 1, 2, 3, 4, 5, 6, 7]) } /// Vector combine @@ -1809,7 +1809,7 @@ pub unsafe fn vcombine_p16(low: poly16x4_t, high: poly16x4_t) -> poly16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_f64(low: float64x1_t, high: float64x1_t) -> float64x2_t { - simd_shuffle2(low, high, [0, 1]) + simd_shuffle2!(low, high, [0, 1]) } /// Table look-up diff --git a/crates/core_arch/src/arm_shared/neon/mod.rs b/crates/core_arch/src/arm_shared/neon/mod.rs index 3c87862166..f7e97e7a05 100644 --- a/crates/core_arch/src/arm_shared/neon/mod.rs +++ b/crates/core_arch/src/arm_shared/neon/mod.rs @@ -580,7 +580,7 @@ pub unsafe fn vld1q_lane_f32(ptr: *const f32, src: float32x4_t) #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] pub unsafe fn vld1_dup_s8(ptr: *const i8) -> int8x8_t { let x = vld1_lane_s8::<0>(ptr, transmute(i8x8::splat(0))); - simd_shuffle8(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) + simd_shuffle8!(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) } /// Load one single-element structure and Replicate to all lanes (of one register). @@ -591,7 +591,7 @@ pub unsafe fn vld1_dup_s8(ptr: *const i8) -> int8x8_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] pub unsafe fn vld1q_dup_s8(ptr: *const i8) -> int8x16_t { let x = vld1q_lane_s8::<0>(ptr, transmute(i8x16::splat(0))); - simd_shuffle16(x, x, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + simd_shuffle16!(x, x, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) } /// Load one single-element structure and Replicate to all lanes (of one register). @@ -602,7 +602,7 @@ pub unsafe fn vld1q_dup_s8(ptr: *const i8) -> int8x16_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] pub unsafe fn vld1_dup_s16(ptr: *const i16) -> int16x4_t { let x = vld1_lane_s16::<0>(ptr, transmute(i16x4::splat(0))); - simd_shuffle4(x, x, [0, 0, 0, 0]) + simd_shuffle4!(x, x, [0, 0, 0, 0]) } /// Load one single-element structure and Replicate to all lanes (of one register). @@ -613,7 +613,7 @@ pub unsafe fn vld1_dup_s16(ptr: *const i16) -> int16x4_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] pub unsafe fn vld1q_dup_s16(ptr: *const i16) -> int16x8_t { let x = vld1q_lane_s16::<0>(ptr, transmute(i16x8::splat(0))); - simd_shuffle8(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) + simd_shuffle8!(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) } /// Load one single-element structure and Replicate to all lanes (of one register). @@ -624,7 +624,7 @@ pub unsafe fn vld1q_dup_s16(ptr: *const i16) -> int16x8_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] pub unsafe fn vld1_dup_s32(ptr: *const i32) -> int32x2_t { let x = vld1_lane_s32::<0>(ptr, transmute(i32x2::splat(0))); - simd_shuffle2(x, x, [0, 0]) + simd_shuffle2!(x, x, [0, 0]) } /// Load one single-element structure and Replicate to all lanes (of one register). @@ -635,7 +635,7 @@ pub unsafe fn vld1_dup_s32(ptr: *const i32) -> int32x2_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] pub unsafe fn vld1q_dup_s32(ptr: *const i32) -> int32x4_t { let x = vld1q_lane_s32::<0>(ptr, transmute(i32x4::splat(0))); - simd_shuffle4(x, x, [0, 0, 0, 0]) + simd_shuffle4!(x, x, [0, 0, 0, 0]) } /// Load one single-element structure and Replicate to all lanes (of one register). @@ -663,7 +663,7 @@ pub unsafe fn vld1_dup_s64(ptr: *const i64) -> int64x1_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] pub unsafe fn vld1q_dup_s64(ptr: *const i64) -> int64x2_t { let x = vld1q_lane_s64::<0>(ptr, transmute(i64x2::splat(0))); - simd_shuffle2(x, x, [0, 0]) + simd_shuffle2!(x, x, [0, 0]) } /// Load one single-element structure and Replicate to all lanes (of one register). @@ -674,7 +674,7 @@ pub unsafe fn vld1q_dup_s64(ptr: *const i64) -> int64x2_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] pub unsafe fn vld1_dup_u8(ptr: *const u8) -> uint8x8_t { let x = vld1_lane_u8::<0>(ptr, transmute(u8x8::splat(0))); - simd_shuffle8(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) + simd_shuffle8!(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) } /// Load one single-element structure and Replicate to all lanes (of one register). @@ -685,7 +685,7 @@ pub unsafe fn vld1_dup_u8(ptr: *const u8) -> uint8x8_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] pub unsafe fn vld1q_dup_u8(ptr: *const u8) -> uint8x16_t { let x = vld1q_lane_u8::<0>(ptr, transmute(u8x16::splat(0))); - simd_shuffle16(x, x, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + simd_shuffle16!(x, x, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) } /// Load one single-element structure and Replicate to all lanes (of one register). @@ -696,7 +696,7 @@ pub unsafe fn vld1q_dup_u8(ptr: *const u8) -> uint8x16_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] pub unsafe fn vld1_dup_u16(ptr: *const u16) -> uint16x4_t { let x = vld1_lane_u16::<0>(ptr, transmute(u16x4::splat(0))); - simd_shuffle4(x, x, [0, 0, 0, 0]) + simd_shuffle4!(x, x, [0, 0, 0, 0]) } /// Load one single-element structure and Replicate to all lanes (of one register). @@ -707,7 +707,7 @@ pub unsafe fn vld1_dup_u16(ptr: *const u16) -> uint16x4_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] pub unsafe fn vld1q_dup_u16(ptr: *const u16) -> uint16x8_t { let x = vld1q_lane_u16::<0>(ptr, transmute(u16x8::splat(0))); - simd_shuffle8(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) + simd_shuffle8!(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) } /// Load one single-element structure and Replicate to all lanes (of one register). @@ -718,7 +718,7 @@ pub unsafe fn vld1q_dup_u16(ptr: *const u16) -> uint16x8_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] pub unsafe fn vld1_dup_u32(ptr: *const u32) -> uint32x2_t { let x = vld1_lane_u32::<0>(ptr, transmute(u32x2::splat(0))); - simd_shuffle2(x, x, [0, 0]) + simd_shuffle2!(x, x, [0, 0]) } /// Load one single-element structure and Replicate to all lanes (of one register). @@ -729,7 +729,7 @@ pub unsafe fn vld1_dup_u32(ptr: *const u32) -> uint32x2_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] pub unsafe fn vld1q_dup_u32(ptr: *const u32) -> uint32x4_t { let x = vld1q_lane_u32::<0>(ptr, transmute(u32x4::splat(0))); - simd_shuffle4(x, x, [0, 0, 0, 0]) + simd_shuffle4!(x, x, [0, 0, 0, 0]) } /// Load one single-element structure and Replicate to all lanes (of one register). @@ -757,7 +757,7 @@ pub unsafe fn vld1_dup_u64(ptr: *const u64) -> uint64x1_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] pub unsafe fn vld1q_dup_u64(ptr: *const u64) -> uint64x2_t { let x = vld1q_lane_u64::<0>(ptr, transmute(u64x2::splat(0))); - simd_shuffle2(x, x, [0, 0]) + simd_shuffle2!(x, x, [0, 0]) } /// Load one single-element structure and Replicate to all lanes (of one register). @@ -768,7 +768,7 @@ pub unsafe fn vld1q_dup_u64(ptr: *const u64) -> uint64x2_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] pub unsafe fn vld1_dup_p8(ptr: *const p8) -> poly8x8_t { let x = vld1_lane_p8::<0>(ptr, transmute(u8x8::splat(0))); - simd_shuffle8(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) + simd_shuffle8!(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) } /// Load one single-element structure and Replicate to all lanes (of one register). @@ -779,7 +779,7 @@ pub unsafe fn vld1_dup_p8(ptr: *const p8) -> poly8x8_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] pub unsafe fn vld1q_dup_p8(ptr: *const p8) -> poly8x16_t { let x = vld1q_lane_p8::<0>(ptr, transmute(u8x16::splat(0))); - simd_shuffle16(x, x, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + simd_shuffle16!(x, x, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) } /// Load one single-element structure and Replicate to all lanes (of one register). @@ -790,7 +790,7 @@ pub unsafe fn vld1q_dup_p8(ptr: *const p8) -> poly8x16_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] pub unsafe fn vld1_dup_p16(ptr: *const p16) -> poly16x4_t { let x = vld1_lane_p16::<0>(ptr, transmute(u16x4::splat(0))); - simd_shuffle4(x, x, [0, 0, 0, 0]) + simd_shuffle4!(x, x, [0, 0, 0, 0]) } /// Load one single-element structure and Replicate to all lanes (of one register). @@ -801,7 +801,7 @@ pub unsafe fn vld1_dup_p16(ptr: *const p16) -> poly16x4_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] pub unsafe fn vld1q_dup_p16(ptr: *const p16) -> poly16x8_t { let x = vld1q_lane_p16::<0>(ptr, transmute(u16x8::splat(0))); - simd_shuffle8(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) + simd_shuffle8!(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) } /// Load one single-element structure and Replicate to all lanes (of one register). @@ -812,7 +812,7 @@ pub unsafe fn vld1q_dup_p16(ptr: *const p16) -> poly16x8_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] pub unsafe fn vld1_dup_f32(ptr: *const f32) -> float32x2_t { let x = vld1_lane_f32::<0>(ptr, transmute(f32x2::splat(0.))); - simd_shuffle2(x, x, [0, 0]) + simd_shuffle2!(x, x, [0, 0]) } /// Load one single-element structure and Replicate to all lanes (of one register). @@ -823,7 +823,7 @@ pub unsafe fn vld1_dup_f32(ptr: *const f32) -> float32x2_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] pub unsafe fn vld1q_dup_f32(ptr: *const f32) -> float32x4_t { let x = vld1q_lane_f32::<0>(ptr, transmute(f32x4::splat(0.))); - simd_shuffle4(x, x, [0, 0, 0, 0]) + simd_shuffle4!(x, x, [0, 0, 0, 0]) } // signed absolute difference and accumulate (64-bit) @@ -1284,8 +1284,8 @@ pub unsafe fn vaddl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl2))] pub unsafe fn vaddl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t { - let a: int8x8_t = simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); - let b: int8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let a: int8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + let b: int8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); let a: int16x8_t = simd_cast(a); let b: int16x8_t = simd_cast(b); simd_add(a, b) @@ -1298,8 +1298,8 @@ pub unsafe fn vaddl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl2))] pub unsafe fn vaddl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { - let a: int16x4_t = simd_shuffle4(a, a, [4, 5, 6, 7]); - let b: int16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]); + let a: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]); + let b: int16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]); let a: int32x4_t = simd_cast(a); let b: int32x4_t = simd_cast(b); simd_add(a, b) @@ -1312,8 +1312,8 @@ pub unsafe fn vaddl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl2))] pub unsafe fn vaddl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { - let a: int32x2_t = simd_shuffle2(a, a, [2, 3]); - let b: int32x2_t = simd_shuffle2(b, b, [2, 3]); + let a: int32x2_t = simd_shuffle2!(a, a, [2, 3]); + let b: int32x2_t = simd_shuffle2!(b, b, [2, 3]); let a: int64x2_t = simd_cast(a); let b: int64x2_t = simd_cast(b); simd_add(a, b) @@ -1326,8 +1326,8 @@ pub unsafe fn vaddl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl2))] pub unsafe fn vaddl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t { - let a: uint8x8_t = simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); - let b: uint8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let a: uint8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + let b: uint8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); let a: uint16x8_t = simd_cast(a); let b: uint16x8_t = simd_cast(b); simd_add(a, b) @@ -1340,8 +1340,8 @@ pub unsafe fn vaddl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl2))] pub unsafe fn vaddl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t { - let a: uint16x4_t = simd_shuffle4(a, a, [4, 5, 6, 7]); - let b: uint16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]); + let a: uint16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]); + let b: uint16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]); let a: uint32x4_t = simd_cast(a); let b: uint32x4_t = simd_cast(b); simd_add(a, b) @@ -1354,8 +1354,8 @@ pub unsafe fn vaddl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl2))] pub unsafe fn vaddl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t { - let a: uint32x2_t = simd_shuffle2(a, a, [2, 3]); - let b: uint32x2_t = simd_shuffle2(b, b, [2, 3]); + let a: uint32x2_t = simd_shuffle2!(a, a, [2, 3]); + let b: uint32x2_t = simd_shuffle2!(b, b, [2, 3]); let a: uint64x2_t = simd_cast(a); let b: uint64x2_t = simd_cast(b); simd_add(a, b) @@ -1434,7 +1434,7 @@ pub unsafe fn vaddw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddw2))] pub unsafe fn vaddw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { - let b: int8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let b: int8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); let b: int16x8_t = simd_cast(b); simd_add(a, b) } @@ -1446,7 +1446,7 @@ pub unsafe fn vaddw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddw2))] pub unsafe fn vaddw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { - let b: int16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]); + let b: int16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]); let b: int32x4_t = simd_cast(b); simd_add(a, b) } @@ -1458,7 +1458,7 @@ pub unsafe fn vaddw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddw2))] pub unsafe fn vaddw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { - let b: int32x2_t = simd_shuffle2(b, b, [2, 3]); + let b: int32x2_t = simd_shuffle2!(b, b, [2, 3]); let b: int64x2_t = simd_cast(b); simd_add(a, b) } @@ -1470,7 +1470,7 @@ pub unsafe fn vaddw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddw2))] pub unsafe fn vaddw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { - let b: uint8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let b: uint8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); let b: uint16x8_t = simd_cast(b); simd_add(a, b) } @@ -1482,7 +1482,7 @@ pub unsafe fn vaddw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddw2))] pub unsafe fn vaddw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { - let b: uint16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]); + let b: uint16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]); let b: uint32x4_t = simd_cast(b); simd_add(a, b) } @@ -1494,7 +1494,7 @@ pub unsafe fn vaddw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddw2))] pub unsafe fn vaddw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { - let b: uint32x2_t = simd_shuffle2(b, b, [2, 3]); + let b: uint32x2_t = simd_shuffle2!(b, b, [2, 3]); let b: uint64x2_t = simd_cast(b); simd_add(a, b) } @@ -1567,7 +1567,7 @@ pub unsafe fn vaddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn2))] pub unsafe fn vaddhn_high_s16(r: int8x8_t, a: int16x8_t, b: int16x8_t) -> int8x16_t { let x = simd_cast(simd_shr(simd_add(a, b), int16x8_t(8, 8, 8, 8, 8, 8, 8, 8))); - simd_shuffle16(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) + simd_shuffle16!(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } /// Add returning High Narrow (high half). @@ -1578,7 +1578,7 @@ pub unsafe fn vaddhn_high_s16(r: int8x8_t, a: int16x8_t, b: int16x8_t) -> int8x1 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn2))] pub unsafe fn vaddhn_high_s32(r: int16x4_t, a: int32x4_t, b: int32x4_t) -> int16x8_t { let x = simd_cast(simd_shr(simd_add(a, b), int32x4_t(16, 16, 16, 16))); - simd_shuffle8(r, x, [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(r, x, [0, 1, 2, 3, 4, 5, 6, 7]) } /// Add returning High Narrow (high half). @@ -1589,7 +1589,7 @@ pub unsafe fn vaddhn_high_s32(r: int16x4_t, a: int32x4_t, b: int32x4_t) -> int16 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn2))] pub unsafe fn vaddhn_high_s64(r: int32x2_t, a: int64x2_t, b: int64x2_t) -> int32x4_t { let x = simd_cast(simd_shr(simd_add(a, b), int64x2_t(32, 32))); - simd_shuffle4(r, x, [0, 1, 2, 3]) + simd_shuffle4!(r, x, [0, 1, 2, 3]) } /// Add returning High Narrow (high half). @@ -1600,7 +1600,7 @@ pub unsafe fn vaddhn_high_s64(r: int32x2_t, a: int64x2_t, b: int64x2_t) -> int32 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn2))] pub unsafe fn vaddhn_high_u16(r: uint8x8_t, a: uint16x8_t, b: uint16x8_t) -> uint8x16_t { let x = simd_cast(simd_shr(simd_add(a, b), uint16x8_t(8, 8, 8, 8, 8, 8, 8, 8))); - simd_shuffle16(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) + simd_shuffle16!(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } /// Add returning High Narrow (high half). @@ -1611,7 +1611,7 @@ pub unsafe fn vaddhn_high_u16(r: uint8x8_t, a: uint16x8_t, b: uint16x8_t) -> uin #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn2))] pub unsafe fn vaddhn_high_u32(r: uint16x4_t, a: uint32x4_t, b: uint32x4_t) -> uint16x8_t { let x = simd_cast(simd_shr(simd_add(a, b), uint32x4_t(16, 16, 16, 16))); - simd_shuffle8(r, x, [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(r, x, [0, 1, 2, 3, 4, 5, 6, 7]) } /// Add returning High Narrow (high half). @@ -1622,7 +1622,7 @@ pub unsafe fn vaddhn_high_u32(r: uint16x4_t, a: uint32x4_t, b: uint32x4_t) -> ui #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn2))] pub unsafe fn vaddhn_high_u64(r: uint32x2_t, a: uint64x2_t, b: uint64x2_t) -> uint32x4_t { let x = simd_cast(simd_shr(simd_add(a, b), uint64x2_t(32, 32))); - simd_shuffle4(r, x, [0, 1, 2, 3]) + simd_shuffle4!(r, x, [0, 1, 2, 3]) } /// Rounding Add returning High Narrow. @@ -1693,7 +1693,7 @@ pub unsafe fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn2))] pub unsafe fn vraddhn_high_s16(r: int8x8_t, a: int16x8_t, b: int16x8_t) -> int8x16_t { let x = vraddhn_s16_(a, b); - simd_shuffle16(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) + simd_shuffle16!(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } /// Rounding Add returning High Narrow (high half). @@ -1704,7 +1704,7 @@ pub unsafe fn vraddhn_high_s16(r: int8x8_t, a: int16x8_t, b: int16x8_t) -> int8x #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn2))] pub unsafe fn vraddhn_high_s32(r: int16x4_t, a: int32x4_t, b: int32x4_t) -> int16x8_t { let x = vraddhn_s32_(a, b); - simd_shuffle8(r, x, [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(r, x, [0, 1, 2, 3, 4, 5, 6, 7]) } /// Rounding Add returning High Narrow (high half). @@ -1715,7 +1715,7 @@ pub unsafe fn vraddhn_high_s32(r: int16x4_t, a: int32x4_t, b: int32x4_t) -> int1 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn2))] pub unsafe fn vraddhn_high_s64(r: int32x2_t, a: int64x2_t, b: int64x2_t) -> int32x4_t { let x = vraddhn_s64_(a, b); - simd_shuffle4(r, x, [0, 1, 2, 3]) + simd_shuffle4!(r, x, [0, 1, 2, 3]) } /// Rounding Add returning High Narrow (high half). @@ -1726,7 +1726,7 @@ pub unsafe fn vraddhn_high_s64(r: int32x2_t, a: int64x2_t, b: int64x2_t) -> int3 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn2))] pub unsafe fn vraddhn_high_u16(r: uint8x8_t, a: uint16x8_t, b: uint16x8_t) -> uint8x16_t { let x: uint8x8_t = transmute(vraddhn_s16_(transmute(a), transmute(b))); - simd_shuffle16(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) + simd_shuffle16!(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } /// Rounding Add returning High Narrow (high half). @@ -1737,7 +1737,7 @@ pub unsafe fn vraddhn_high_u16(r: uint8x8_t, a: uint16x8_t, b: uint16x8_t) -> ui #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn2))] pub unsafe fn vraddhn_high_u32(r: uint16x4_t, a: uint32x4_t, b: uint32x4_t) -> uint16x8_t { let x: uint16x4_t = transmute(vraddhn_s32_(transmute(a), transmute(b))); - simd_shuffle8(r, x, [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(r, x, [0, 1, 2, 3, 4, 5, 6, 7]) } /// Rounding Add returning High Narrow (high half). @@ -1748,7 +1748,7 @@ pub unsafe fn vraddhn_high_u32(r: uint16x4_t, a: uint32x4_t, b: uint32x4_t) -> u #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn2))] pub unsafe fn vraddhn_high_u64(r: uint32x2_t, a: uint64x2_t, b: uint64x2_t) -> uint32x4_t { let x: uint32x2_t = transmute(vraddhn_s64_(transmute(a), transmute(b))); - simd_shuffle4(r, x, [0, 1, 2, 3]) + simd_shuffle4!(r, x, [0, 1, 2, 3]) } /// Signed Add Long Pairwise. @@ -2961,7 +2961,7 @@ pub unsafe fn vget_lane_u8(v: uint8x8_t) -> u8 { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] pub unsafe fn vget_high_s8(a: int8x16_t) -> int8x8_t { - simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) + simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) } /// Duplicate vector element to vector or scalar @@ -2971,7 +2971,7 @@ pub unsafe fn vget_high_s8(a: int8x16_t) -> int8x8_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] pub unsafe fn vget_high_s16(a: int16x8_t) -> int16x4_t { - simd_shuffle4(a, a, [4, 5, 6, 7]) + simd_shuffle4!(a, a, [4, 5, 6, 7]) } /// Duplicate vector element to vector or scalar @@ -2981,7 +2981,7 @@ pub unsafe fn vget_high_s16(a: int16x8_t) -> int16x4_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] pub unsafe fn vget_high_s32(a: int32x4_t) -> int32x2_t { - simd_shuffle2(a, a, [2, 3]) + simd_shuffle2!(a, a, [2, 3]) } /// Duplicate vector element to vector or scalar @@ -3001,7 +3001,7 @@ pub unsafe fn vget_high_s64(a: int64x2_t) -> int64x1_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] pub unsafe fn vget_high_u8(a: uint8x16_t) -> uint8x8_t { - simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) + simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) } /// Duplicate vector element to vector or scalar @@ -3011,7 +3011,7 @@ pub unsafe fn vget_high_u8(a: uint8x16_t) -> uint8x8_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] pub unsafe fn vget_high_u16(a: uint16x8_t) -> uint16x4_t { - simd_shuffle4(a, a, [4, 5, 6, 7]) + simd_shuffle4!(a, a, [4, 5, 6, 7]) } /// Duplicate vector element to vector or scalar @@ -3021,7 +3021,7 @@ pub unsafe fn vget_high_u16(a: uint16x8_t) -> uint16x4_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] pub unsafe fn vget_high_u32(a: uint32x4_t) -> uint32x2_t { - simd_shuffle2(a, a, [2, 3]) + simd_shuffle2!(a, a, [2, 3]) } /// Duplicate vector element to vector or scalar @@ -3041,7 +3041,7 @@ pub unsafe fn vget_high_u64(a: uint64x2_t) -> uint64x1_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] pub unsafe fn vget_high_p8(a: poly8x16_t) -> poly8x8_t { - simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) + simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) } /// Duplicate vector element to vector or scalar @@ -3051,7 +3051,7 @@ pub unsafe fn vget_high_p8(a: poly8x16_t) -> poly8x8_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] pub unsafe fn vget_high_p16(a: poly16x8_t) -> poly16x4_t { - simd_shuffle4(a, a, [4, 5, 6, 7]) + simd_shuffle4!(a, a, [4, 5, 6, 7]) } /// Duplicate vector element to vector or scalar @@ -3061,7 +3061,7 @@ pub unsafe fn vget_high_p16(a: poly16x8_t) -> poly16x4_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] pub unsafe fn vget_high_f32(a: float32x4_t) -> float32x2_t { - simd_shuffle2(a, a, [2, 3]) + simd_shuffle2!(a, a, [2, 3]) } /// Duplicate vector element to vector or scalar @@ -3071,7 +3071,7 @@ pub unsafe fn vget_high_f32(a: float32x4_t) -> float32x2_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))] pub unsafe fn vget_low_s8(a: int8x16_t) -> int8x8_t { - simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } /// Duplicate vector element to vector or scalar @@ -3081,7 +3081,7 @@ pub unsafe fn vget_low_s8(a: int8x16_t) -> int8x8_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))] pub unsafe fn vget_low_s16(a: int16x8_t) -> int16x4_t { - simd_shuffle4(a, a, [0, 1, 2, 3]) + simd_shuffle4!(a, a, [0, 1, 2, 3]) } /// Duplicate vector element to vector or scalar @@ -3091,7 +3091,7 @@ pub unsafe fn vget_low_s16(a: int16x8_t) -> int16x4_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))] pub unsafe fn vget_low_s32(a: int32x4_t) -> int32x2_t { - simd_shuffle2(a, a, [0, 1]) + simd_shuffle2!(a, a, [0, 1]) } /// Duplicate vector element to vector or scalar @@ -3111,7 +3111,7 @@ pub unsafe fn vget_low_s64(a: int64x2_t) -> int64x1_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))] pub unsafe fn vget_low_u8(a: uint8x16_t) -> uint8x8_t { - simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } /// Duplicate vector element to vector or scalar @@ -3121,7 +3121,7 @@ pub unsafe fn vget_low_u8(a: uint8x16_t) -> uint8x8_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))] pub unsafe fn vget_low_u16(a: uint16x8_t) -> uint16x4_t { - simd_shuffle4(a, a, [0, 1, 2, 3]) + simd_shuffle4!(a, a, [0, 1, 2, 3]) } /// Duplicate vector element to vector or scalar @@ -3131,7 +3131,7 @@ pub unsafe fn vget_low_u16(a: uint16x8_t) -> uint16x4_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))] pub unsafe fn vget_low_u32(a: uint32x4_t) -> uint32x2_t { - simd_shuffle2(a, a, [0, 1]) + simd_shuffle2!(a, a, [0, 1]) } /// Duplicate vector element to vector or scalar @@ -3151,7 +3151,7 @@ pub unsafe fn vget_low_u64(a: uint64x2_t) -> uint64x1_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))] pub unsafe fn vget_low_p8(a: poly8x16_t) -> poly8x8_t { - simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } /// Duplicate vector element to vector or scalar @@ -3161,7 +3161,7 @@ pub unsafe fn vget_low_p8(a: poly8x16_t) -> poly8x8_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))] pub unsafe fn vget_low_p16(a: poly16x8_t) -> poly16x4_t { - simd_shuffle4(a, a, [0, 1, 2, 3]) + simd_shuffle4!(a, a, [0, 1, 2, 3]) } /// Duplicate vector element to vector or scalar @@ -3171,7 +3171,7 @@ pub unsafe fn vget_low_p16(a: poly16x8_t) -> poly16x4_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))] pub unsafe fn vget_low_f32(a: float32x4_t) -> float32x2_t { - simd_shuffle2(a, a, [0, 1]) + simd_shuffle2!(a, a, [0, 1]) } /// Duplicate vector element to vector or scalar @@ -3713,7 +3713,7 @@ pub unsafe fn vcntq_p8(a: poly8x16_t) -> poly8x16_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev16))] pub unsafe fn vrev16_s8(a: int8x8_t) -> int8x8_t { - simd_shuffle8(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) + simd_shuffle8!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } /// Reversing vector elements (swap endianness) @@ -3723,7 +3723,7 @@ pub unsafe fn vrev16_s8(a: int8x8_t) -> int8x8_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev16))] pub unsafe fn vrev16q_s8(a: int8x16_t) -> int8x16_t { - simd_shuffle16(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) + simd_shuffle16!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) } /// Reversing vector elements (swap endianness) @@ -3733,7 +3733,7 @@ pub unsafe fn vrev16q_s8(a: int8x16_t) -> int8x16_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev16))] pub unsafe fn vrev16_u8(a: uint8x8_t) -> uint8x8_t { - simd_shuffle8(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) + simd_shuffle8!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } /// Reversing vector elements (swap endianness) @@ -3743,7 +3743,7 @@ pub unsafe fn vrev16_u8(a: uint8x8_t) -> uint8x8_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev16))] pub unsafe fn vrev16q_u8(a: uint8x16_t) -> uint8x16_t { - simd_shuffle16(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) + simd_shuffle16!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) } /// Reversing vector elements (swap endianness) @@ -3753,7 +3753,7 @@ pub unsafe fn vrev16q_u8(a: uint8x16_t) -> uint8x16_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev16))] pub unsafe fn vrev16_p8(a: poly8x8_t) -> poly8x8_t { - simd_shuffle8(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) + simd_shuffle8!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } /// Reversing vector elements (swap endianness) @@ -3763,7 +3763,7 @@ pub unsafe fn vrev16_p8(a: poly8x8_t) -> poly8x8_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev16))] pub unsafe fn vrev16q_p8(a: poly8x16_t) -> poly8x16_t { - simd_shuffle16(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) + simd_shuffle16!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) } /// Reversing vector elements (swap endianness) @@ -3773,7 +3773,7 @@ pub unsafe fn vrev16q_p8(a: poly8x16_t) -> poly8x16_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] pub unsafe fn vrev32_s8(a: int8x8_t) -> int8x8_t { - simd_shuffle8(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) + simd_shuffle8!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } /// Reversing vector elements (swap endianness) @@ -3783,7 +3783,7 @@ pub unsafe fn vrev32_s8(a: int8x8_t) -> int8x8_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] pub unsafe fn vrev32q_s8(a: int8x16_t) -> int8x16_t { - simd_shuffle16(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) + simd_shuffle16!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) } /// Reversing vector elements (swap endianness) @@ -3793,7 +3793,7 @@ pub unsafe fn vrev32q_s8(a: int8x16_t) -> int8x16_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] pub unsafe fn vrev32_u8(a: uint8x8_t) -> uint8x8_t { - simd_shuffle8(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) + simd_shuffle8!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } /// Reversing vector elements (swap endianness) @@ -3803,7 +3803,7 @@ pub unsafe fn vrev32_u8(a: uint8x8_t) -> uint8x8_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] pub unsafe fn vrev32q_u8(a: uint8x16_t) -> uint8x16_t { - simd_shuffle16(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) + simd_shuffle16!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) } /// Reversing vector elements (swap endianness) @@ -3813,7 +3813,7 @@ pub unsafe fn vrev32q_u8(a: uint8x16_t) -> uint8x16_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] pub unsafe fn vrev32_s16(a: int16x4_t) -> int16x4_t { - simd_shuffle4(a, a, [1, 0, 3, 2]) + simd_shuffle4!(a, a, [1, 0, 3, 2]) } /// Reversing vector elements (swap endianness) @@ -3823,7 +3823,7 @@ pub unsafe fn vrev32_s16(a: int16x4_t) -> int16x4_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] pub unsafe fn vrev32q_s16(a: int16x8_t) -> int16x8_t { - simd_shuffle8(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) + simd_shuffle8!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } /// Reversing vector elements (swap endianness) @@ -3833,7 +3833,7 @@ pub unsafe fn vrev32q_s16(a: int16x8_t) -> int16x8_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] pub unsafe fn vrev32_p16(a: poly16x4_t) -> poly16x4_t { - simd_shuffle4(a, a, [1, 0, 3, 2]) + simd_shuffle4!(a, a, [1, 0, 3, 2]) } /// Reversing vector elements (swap endianness) @@ -3843,7 +3843,7 @@ pub unsafe fn vrev32_p16(a: poly16x4_t) -> poly16x4_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] pub unsafe fn vrev32q_p16(a: poly16x8_t) -> poly16x8_t { - simd_shuffle8(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) + simd_shuffle8!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } /// Reversing vector elements (swap endianness) @@ -3853,7 +3853,7 @@ pub unsafe fn vrev32q_p16(a: poly16x8_t) -> poly16x8_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] pub unsafe fn vrev32_u16(a: uint16x4_t) -> uint16x4_t { - simd_shuffle4(a, a, [1, 0, 3, 2]) + simd_shuffle4!(a, a, [1, 0, 3, 2]) } /// Reversing vector elements (swap endianness) @@ -3863,7 +3863,7 @@ pub unsafe fn vrev32_u16(a: uint16x4_t) -> uint16x4_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] pub unsafe fn vrev32q_u16(a: uint16x8_t) -> uint16x8_t { - simd_shuffle8(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) + simd_shuffle8!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) } /// Reversing vector elements (swap endianness) @@ -3873,7 +3873,7 @@ pub unsafe fn vrev32q_u16(a: uint16x8_t) -> uint16x8_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] pub unsafe fn vrev32_p8(a: poly8x8_t) -> poly8x8_t { - simd_shuffle8(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) + simd_shuffle8!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } /// Reversing vector elements (swap endianness) @@ -3883,7 +3883,7 @@ pub unsafe fn vrev32_p8(a: poly8x8_t) -> poly8x8_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] pub unsafe fn vrev32q_p8(a: poly8x16_t) -> poly8x16_t { - simd_shuffle16(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) + simd_shuffle16!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) } /// Reversing vector elements (swap endianness) @@ -3893,7 +3893,7 @@ pub unsafe fn vrev32q_p8(a: poly8x16_t) -> poly8x16_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] pub unsafe fn vrev64_s8(a: int8x8_t) -> int8x8_t { - simd_shuffle8(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) + simd_shuffle8!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) } /// Reversing vector elements (swap endianness) @@ -3903,7 +3903,7 @@ pub unsafe fn vrev64_s8(a: int8x8_t) -> int8x8_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] pub unsafe fn vrev64q_s8(a: int8x16_t) -> int8x16_t { - simd_shuffle16(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) + simd_shuffle16!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) } /// Reversing vector elements (swap endianness) @@ -3913,7 +3913,7 @@ pub unsafe fn vrev64q_s8(a: int8x16_t) -> int8x16_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] pub unsafe fn vrev64_s16(a: int16x4_t) -> int16x4_t { - simd_shuffle4(a, a, [3, 2, 1, 0]) + simd_shuffle4!(a, a, [3, 2, 1, 0]) } /// Reversing vector elements (swap endianness) @@ -3923,7 +3923,7 @@ pub unsafe fn vrev64_s16(a: int16x4_t) -> int16x4_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] pub unsafe fn vrev64q_s16(a: int16x8_t) -> int16x8_t { - simd_shuffle8(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) + simd_shuffle8!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } /// Reversing vector elements (swap endianness) @@ -3933,7 +3933,7 @@ pub unsafe fn vrev64q_s16(a: int16x8_t) -> int16x8_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] pub unsafe fn vrev64_s32(a: int32x2_t) -> int32x2_t { - simd_shuffle2(a, a, [1, 0]) + simd_shuffle2!(a, a, [1, 0]) } /// Reversing vector elements (swap endianness) @@ -3943,7 +3943,7 @@ pub unsafe fn vrev64_s32(a: int32x2_t) -> int32x2_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] pub unsafe fn vrev64q_s32(a: int32x4_t) -> int32x4_t { - simd_shuffle4(a, a, [1, 0, 3, 2]) + simd_shuffle4!(a, a, [1, 0, 3, 2]) } /// Reversing vector elements (swap endianness) @@ -3953,7 +3953,7 @@ pub unsafe fn vrev64q_s32(a: int32x4_t) -> int32x4_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] pub unsafe fn vrev64_u8(a: uint8x8_t) -> uint8x8_t { - simd_shuffle8(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) + simd_shuffle8!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) } /// Reversing vector elements (swap endianness) @@ -3963,7 +3963,7 @@ pub unsafe fn vrev64_u8(a: uint8x8_t) -> uint8x8_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] pub unsafe fn vrev64q_u8(a: uint8x16_t) -> uint8x16_t { - simd_shuffle16(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) + simd_shuffle16!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) } /// Reversing vector elements (swap endianness) @@ -3973,7 +3973,7 @@ pub unsafe fn vrev64q_u8(a: uint8x16_t) -> uint8x16_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] pub unsafe fn vrev64_u16(a: uint16x4_t) -> uint16x4_t { - simd_shuffle4(a, a, [3, 2, 1, 0]) + simd_shuffle4!(a, a, [3, 2, 1, 0]) } /// Reversing vector elements (swap endianness) @@ -3983,7 +3983,7 @@ pub unsafe fn vrev64_u16(a: uint16x4_t) -> uint16x4_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] pub unsafe fn vrev64q_u16(a: uint16x8_t) -> uint16x8_t { - simd_shuffle8(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) + simd_shuffle8!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } /// Reversing vector elements (swap endianness) @@ -3993,7 +3993,7 @@ pub unsafe fn vrev64q_u16(a: uint16x8_t) -> uint16x8_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] pub unsafe fn vrev64_u32(a: uint32x2_t) -> uint32x2_t { - simd_shuffle2(a, a, [1, 0]) + simd_shuffle2!(a, a, [1, 0]) } /// Reversing vector elements (swap endianness) @@ -4003,7 +4003,7 @@ pub unsafe fn vrev64_u32(a: uint32x2_t) -> uint32x2_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] pub unsafe fn vrev64q_u32(a: uint32x4_t) -> uint32x4_t { - simd_shuffle4(a, a, [1, 0, 3, 2]) + simd_shuffle4!(a, a, [1, 0, 3, 2]) } /// Reversing vector elements (swap endianness) @@ -4013,7 +4013,7 @@ pub unsafe fn vrev64q_u32(a: uint32x4_t) -> uint32x4_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] pub unsafe fn vrev64_f32(a: float32x2_t) -> float32x2_t { - simd_shuffle2(a, a, [1, 0]) + simd_shuffle2!(a, a, [1, 0]) } /// Reversing vector elements (swap endianness) @@ -4023,7 +4023,7 @@ pub unsafe fn vrev64_f32(a: float32x2_t) -> float32x2_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] pub unsafe fn vrev64q_f32(a: float32x4_t) -> float32x4_t { - simd_shuffle4(a, a, [1, 0, 3, 2]) + simd_shuffle4!(a, a, [1, 0, 3, 2]) } /// Reversing vector elements (swap endianness) @@ -4033,7 +4033,7 @@ pub unsafe fn vrev64q_f32(a: float32x4_t) -> float32x4_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] pub unsafe fn vrev64_p8(a: poly8x8_t) -> poly8x8_t { - simd_shuffle8(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) + simd_shuffle8!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) } /// Reversing vector elements (swap endianness) @@ -4043,7 +4043,7 @@ pub unsafe fn vrev64_p8(a: poly8x8_t) -> poly8x8_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] pub unsafe fn vrev64q_p8(a: poly8x16_t) -> poly8x16_t { - simd_shuffle16(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) + simd_shuffle16!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) } /// Reversing vector elements (swap endianness) @@ -4053,7 +4053,7 @@ pub unsafe fn vrev64q_p8(a: poly8x16_t) -> poly8x16_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] pub unsafe fn vrev64_p16(a: poly16x4_t) -> poly16x4_t { - simd_shuffle4(a, a, [3, 2, 1, 0]) + simd_shuffle4!(a, a, [3, 2, 1, 0]) } /// Reversing vector elements (swap endianness) @@ -4063,7 +4063,7 @@ pub unsafe fn vrev64_p16(a: poly16x4_t) -> poly16x4_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] pub unsafe fn vrev64q_p16(a: poly16x8_t) -> poly16x8_t { - simd_shuffle8(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) + simd_shuffle8!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } /// Signed Add and Accumulate Long Pairwise. diff --git a/crates/core_arch/src/macros.rs b/crates/core_arch/src/macros.rs index b8cda93d5a..ce59a0ab8b 100644 --- a/crates/core_arch/src/macros.rs +++ b/crates/core_arch/src/macros.rs @@ -92,3 +92,123 @@ macro_rules! types { pub struct $name($($fields)*); )*) } + +#[allow(unused_macros)] +macro_rules! simd_shuffle2 { + ($x:expr, $y:expr, $idx:expr $(,)?) => {{ + const IDX: [u32; 2] = $idx; + simd_shuffle2($x, $y, IDX) + }}; +} + +#[allow(unused_macros)] +macro_rules! simd_shuffle2_param { + ($x:expr, $y:expr, $idx:expr $(,)?) => {{ + struct ConstParam; + impl ConstParam<$imm> { + const IDX: [u32; 2] = $idx; + } + + simd_shuffle2($x, $y, ConstParam::<$imm>::IDX) + }}; +} + +#[allow(unused_macros)] +macro_rules! simd_shuffle4 { + ($x:expr, $y:expr, $idx:expr $(,)?) => {{ + const IDX: [u32; 4] = $idx; + simd_shuffle4($x, $y, IDX) + }}; +} + +#[allow(unused_macros)] +macro_rules! simd_shuffle4_param { + ($x:expr, $y:expr, $idx:expr $(,)?) => {{ + struct ConstParam; + impl ConstParam<$imm> { + const IDX: [u32; 4] = $idx; + } + + simd_shuffle4($x, $y, ConstParam::<$imm>::IDX) + }}; +} + +#[allow(unused_macros)] +macro_rules! simd_shuffle8 { + ($x:expr, $y:expr, $idx:expr $(,)?) => {{ + const IDX: [u32; 8] = $idx; + simd_shuffle8($x, $y, IDX) + }}; +} + +#[allow(unused_macros)] +macro_rules! simd_shuffle8_param { + ($x:expr, $y:expr, $idx:expr $(,)?) => {{ + struct ConstParam; + impl ConstParam<$imm> { + const IDX: [u32; 8] = $idx; + } + + simd_shuffle8($x, $y, ConstParam::<$imm>::IDX) + }}; +} + +#[allow(unused_macros)] +macro_rules! simd_shuffle16 { + ($x:expr, $y:expr, $idx:expr $(,)?) => {{ + const IDX: [u32; 16] = $idx; + simd_shuffle16($x, $y, IDX) + }}; +} + +#[allow(unused_macros)] +macro_rules! simd_shuffle16_param { + ($x:expr, $y:expr, $idx:expr $(,)?) => {{ + struct ConstParam; + impl ConstParam<$imm> { + const IDX: [u32; 16] = $idx; + } + + simd_shuffle16($x, $y, ConstParam::<$imm>::IDX) + }}; +} + +#[allow(unused_macros)] +macro_rules! simd_shuffle32 { + ($x:expr, $y:expr, $idx:expr $(,)?) => {{ + const IDX: [u32; 32] = $idx; + simd_shuffle32($x, $y, IDX) + }}; +} + +#[allow(unused_macros)] +macro_rules! simd_shuffle32_param { + ($x:expr, $y:expr, $idx:expr $(,)?) => {{ + struct ConstParam; + impl ConstParam<$imm> { + const IDX: [u32; 32] = $idx; + } + + simd_shuffle32($x, $y, ConstParam::<$imm>::IDX) + }}; +} + +#[allow(unused_macros)] +macro_rules! simd_shuffle64 { + ($x:expr, $y:expr, $idx:expr $(,)?) => {{ + const IDX: [u32; 64] = $idx; + simd_shuffle64($x, $y, IDX) + }}; +} + +#[allow(unused_macros)] +macro_rules! simd_shuffle64_param { + ($x:expr, $y:expr, $idx:expr $(,)?) => {{ + struct ConstParam; + impl ConstParam<$imm> { + const IDX: [u32; 64] = $idx; + } + + simd_shuffle64($x, $y, ConstParam::<$imm>::IDX) + }}; +} diff --git a/crates/core_arch/src/powerpc/vsx.rs b/crates/core_arch/src/powerpc/vsx.rs index 19335af198..54e067eb03 100644 --- a/crates/core_arch/src/powerpc/vsx.rs +++ b/crates/core_arch/src/powerpc/vsx.rs @@ -47,10 +47,10 @@ mod sealed { #[cfg_attr(all(test, target_endian = "big"), assert_instr(xxspltd, dm = 0x0))] unsafe fn xxpermdi(a: i64x2, b: i64x2, dm: u8) -> i64x2 { match dm & 0b11 { - 0 => simd_shuffle2(a, b, [0b00, 0b10]), - 1 => simd_shuffle2(a, b, [0b01, 0b10]), - 2 => simd_shuffle2(a, b, [0b00, 0b11]), - _ => simd_shuffle2(a, b, [0b01, 0b11]), + 0 => simd_shuffle2!(a, b, [0b00, 0b10]), + 1 => simd_shuffle2!(a, b, [0b01, 0b10]), + 2 => simd_shuffle2!(a, b, [0b00, 0b11]), + _ => simd_shuffle2!(a, b, [0b01, 0b11]), } } diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs index 79a1105d56..320073b016 100644 --- a/crates/core_arch/src/x86/avx.rs +++ b/crates/core_arch/src/x86/avx.rs @@ -118,10 +118,10 @@ pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d) -> __m256d { static_assert_imm8!(MASK); - simd_shuffle4( + simd_shuffle4_param!( a, b, - [ + [ MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 4, ((MASK as u32 >> 2) & 0b1) + 2, @@ -141,10 +141,10 @@ pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d) -> __m2 #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_shuffle_ps(a: __m256, b: __m256) -> __m256 { static_assert_imm8!(MASK); - simd_shuffle8( + simd_shuffle8_param!( a, b, - [ + [ MASK as u32 & 0b11, (MASK as u32 >> 2) & 0b11, ((MASK as u32 >> 4) & 0b11) + 8, @@ -463,10 +463,10 @@ pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d) -> __m256d { static_assert_imm4!(IMM4); - simd_shuffle4( + simd_shuffle4_param!( a, b, - [ + [ ((IMM4 as u32 >> 0) & 1) * 4 + 0, ((IMM4 as u32 >> 1) & 1) * 4 + 1, ((IMM4 as u32 >> 2) & 1) * 4 + 2, @@ -486,10 +486,10 @@ pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d) -> __m256 #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_blend_ps(a: __m256, b: __m256) -> __m256 { static_assert_imm8!(IMM8); - simd_shuffle8( + simd_shuffle8_param!( a, b, - [ + [ ((IMM8 as u32 >> 0) & 1) * 8 + 0, ((IMM8 as u32 >> 1) & 1) * 8 + 1, ((IMM8 as u32 >> 2) & 1) * 8 + 2, @@ -930,10 +930,10 @@ pub unsafe fn _mm256_cvttps_epi32(a: __m256) -> __m256i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extractf128_ps(a: __m256) -> __m128 { static_assert_imm1!(IMM1); - simd_shuffle4( + simd_shuffle4_param!( a, _mm256_undefined_ps(), - [[0, 1, 2, 3], [4, 5, 6, 7]][IMM1 as usize], + [[0, 1, 2, 3], [4, 5, 6, 7]][IMM1 as usize], ) } @@ -951,7 +951,7 @@ pub unsafe fn _mm256_extractf128_ps(a: __m256) -> __m128 { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extractf128_pd(a: __m256d) -> __m128d { static_assert_imm1!(IMM1); - simd_shuffle2(a, _mm256_undefined_pd(), [[0, 1], [2, 3]][IMM1 as usize]) + simd_shuffle2_param!(a, _mm256_undefined_pd(), [[0, 1], [2, 3]][IMM1 as usize]) } /// Extracts 128 bits (composed of integer data) from `a`, selected with `imm8`. @@ -967,10 +967,10 @@ pub unsafe fn _mm256_extractf128_pd(a: __m256d) -> __m128d { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extractf128_si256(a: __m256i) -> __m128i { static_assert_imm1!(IMM1); - let dst: i64x2 = simd_shuffle2( + let dst: i64x2 = simd_shuffle2_param!( a.as_i64x4(), _mm256_undefined_si256().as_i64x4(), - [[0, 1], [2, 3]][IMM1 as usize], + [[0, 1], [2, 3]][IMM1 as usize], ); transmute(dst) } @@ -1033,10 +1033,10 @@ pub unsafe fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permute_ps(a: __m256) -> __m256 { static_assert_imm8!(IMM8); - simd_shuffle8( + simd_shuffle8_param!( a, _mm256_undefined_ps(), - [ + [ (IMM8 as u32 >> 0) & 0b11, (IMM8 as u32 >> 2) & 0b11, (IMM8 as u32 >> 4) & 0b11, @@ -1060,10 +1060,10 @@ pub unsafe fn _mm256_permute_ps(a: __m256) -> __m256 { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_permute_ps(a: __m128) -> __m128 { static_assert_imm8!(IMM8); - simd_shuffle4( + simd_shuffle4_param!( a, _mm_undefined_ps(), - [ + [ (IMM8 as u32 >> 0) & 0b11, (IMM8 as u32 >> 2) & 0b11, (IMM8 as u32 >> 4) & 0b11, @@ -1107,10 +1107,10 @@ pub unsafe fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permute_pd(a: __m256d) -> __m256d { static_assert_imm4!(IMM4); - simd_shuffle4( + simd_shuffle4_param!( a, _mm256_undefined_pd(), - [ + [ ((IMM4 as u32 >> 0) & 1), ((IMM4 as u32 >> 1) & 1), ((IMM4 as u32 >> 2) & 1) + 2, @@ -1130,10 +1130,10 @@ pub unsafe fn _mm256_permute_pd(a: __m256d) -> __m256d { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_permute_pd(a: __m128d) -> __m128d { static_assert_imm2!(IMM2); - simd_shuffle2( + simd_shuffle2_param!( a, _mm_undefined_pd(), - [(IMM2 as u32) & 1, (IMM2 as u32 >> 1) & 1], + [(IMM2 as u32) & 1, (IMM2 as u32 >> 1) & 1], ) } @@ -1257,10 +1257,10 @@ pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_insertf128_ps(a: __m256, b: __m128) -> __m256 { static_assert_imm1!(IMM1); - simd_shuffle8( + simd_shuffle8_param!( a, _mm256_castps128_ps256(b), - [[8, 9, 10, 11, 4, 5, 6, 7], [0, 1, 2, 3, 8, 9, 10, 11]][IMM1 as usize], + [[8, 9, 10, 11, 4, 5, 6, 7], [0, 1, 2, 3, 8, 9, 10, 11]][IMM1 as usize], ) } @@ -1279,10 +1279,10 @@ pub unsafe fn _mm256_insertf128_ps(a: __m256, b: __m128) -> __m #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_insertf128_pd(a: __m256d, b: __m128d) -> __m256d { static_assert_imm1!(IMM1); - simd_shuffle4( + simd_shuffle4_param!( a, _mm256_castpd128_pd256(b), - [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize], + [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize], ) } @@ -1300,10 +1300,10 @@ pub unsafe fn _mm256_insertf128_pd(a: __m256d, b: __m128d) -> _ #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_insertf128_si256(a: __m256i, b: __m128i) -> __m256i { static_assert_imm1!(IMM1); - let dst: i64x4 = simd_shuffle4( + let dst: i64x4 = simd_shuffle4_param!( a.as_i64x4(), _mm256_castsi128_si256(b).as_i64x4(), - [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize], + [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize], ); transmute(dst) } @@ -1639,7 +1639,7 @@ pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) { #[cfg_attr(test, assert_instr(vmovshdup))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_movehdup_ps(a: __m256) -> __m256 { - simd_shuffle8(a, a, [1, 1, 3, 3, 5, 5, 7, 7]) + simd_shuffle8!(a, a, [1, 1, 3, 3, 5, 5, 7, 7]) } /// Duplicate even-indexed single-precision (32-bit) floating-point elements @@ -1651,7 +1651,7 @@ pub unsafe fn _mm256_movehdup_ps(a: __m256) -> __m256 { #[cfg_attr(test, assert_instr(vmovsldup))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_moveldup_ps(a: __m256) -> __m256 { - simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6]) + simd_shuffle8!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]) } /// Duplicate even-indexed double-precision (64-bit) floating-point elements @@ -1663,7 +1663,7 @@ pub unsafe fn _mm256_moveldup_ps(a: __m256) -> __m256 { #[cfg_attr(test, assert_instr(vmovddup))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_movedup_pd(a: __m256d) -> __m256d { - simd_shuffle4(a, a, [0, 0, 2, 2]) + simd_shuffle4!(a, a, [0, 0, 2, 2]) } /// Loads 256-bits of integer data from unaligned memory into result. @@ -1756,7 +1756,7 @@ pub unsafe fn _mm256_rsqrt_ps(a: __m256) -> __m256 { #[cfg_attr(test, assert_instr(vunpckhpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d { - simd_shuffle4(a, b, [1, 5, 3, 7]) + simd_shuffle4!(a, b, [1, 5, 3, 7]) } /// Unpacks and interleave single-precision (32-bit) floating-point elements @@ -1768,7 +1768,7 @@ pub unsafe fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d { #[cfg_attr(test, assert_instr(vunpckhps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 { - simd_shuffle8(a, b, [2, 10, 3, 11, 6, 14, 7, 15]) + simd_shuffle8!(a, b, [2, 10, 3, 11, 6, 14, 7, 15]) } /// Unpacks and interleave double-precision (64-bit) floating-point elements @@ -1780,7 +1780,7 @@ pub unsafe fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 { #[cfg_attr(test, assert_instr(vunpcklpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d { - simd_shuffle4(a, b, [0, 4, 2, 6]) + simd_shuffle4!(a, b, [0, 4, 2, 6]) } /// Unpacks and interleave single-precision (32-bit) floating-point elements @@ -1792,7 +1792,7 @@ pub unsafe fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d { #[cfg_attr(test, assert_instr(vunpcklps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 { - simd_shuffle8(a, b, [0, 8, 1, 9, 4, 12, 5, 13]) + simd_shuffle8!(a, b, [0, 8, 1, 9, 4, 12, 5, 13]) } /// Computes the bitwise AND of 256 bits (representing integer data) in `a` and @@ -2572,7 +2572,7 @@ pub unsafe fn _mm256_castsi256_pd(a: __m256i) -> __m256d { // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castps256_ps128(a: __m256) -> __m128 { - simd_shuffle4(a, a, [0, 1, 2, 3]) + simd_shuffle4!(a, a, [0, 1, 2, 3]) } /// Casts vector of type __m256d to type __m128d. @@ -2584,7 +2584,7 @@ pub unsafe fn _mm256_castps256_ps128(a: __m256) -> __m128 { // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castpd256_pd128(a: __m256d) -> __m128d { - simd_shuffle2(a, a, [0, 1]) + simd_shuffle2!(a, a, [0, 1]) } /// Casts vector of type __m256i to type __m128i. @@ -2597,7 +2597,7 @@ pub unsafe fn _mm256_castpd256_pd128(a: __m256d) -> __m128d { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i { let a = a.as_i64x4(); - let dst: i64x2 = simd_shuffle2(a, a, [0, 1]); + let dst: i64x2 = simd_shuffle2!(a, a, [0, 1]); transmute(dst) } @@ -2611,8 +2611,8 @@ pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i { // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 { - // FIXME simd_shuffle8(a, a, [0, 1, 2, 3, -1, -1, -1, -1]) - simd_shuffle8(a, a, [0, 1, 2, 3, 0, 0, 0, 0]) + // FIXME simd_shuffle8!(a, a, [0, 1, 2, 3, -1, -1, -1, -1]) + simd_shuffle8!(a, a, [0, 1, 2, 3, 0, 0, 0, 0]) } /// Casts vector of type __m128d to type __m256d; @@ -2625,8 +2625,8 @@ pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 { // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d { - // FIXME simd_shuffle4(a, a, [0, 1, -1, -1]) - simd_shuffle4(a, a, [0, 1, 0, 0]) + // FIXME simd_shuffle4!(a, a, [0, 1, -1, -1]) + simd_shuffle4!(a, a, [0, 1, 0, 0]) } /// Casts vector of type __m128i to type __m256i; @@ -2640,8 +2640,8 @@ pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i { let a = a.as_i64x2(); - // FIXME simd_shuffle4(a, a, [0, 1, -1, -1]) - let dst: i64x4 = simd_shuffle4(a, a, [0, 1, 0, 0]); + // FIXME simd_shuffle4!(a, a, [0, 1, -1, -1]) + let dst: i64x4 = simd_shuffle4!(a, a, [0, 1, 0, 0]); transmute(dst) } @@ -2656,7 +2656,7 @@ pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i { // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 { - simd_shuffle8(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7]) } /// Constructs a 256-bit integer vector from a 128-bit integer vector. @@ -2671,7 +2671,7 @@ pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i { let b = _mm_setzero_si128().as_i64x2(); - let dst: i64x4 = simd_shuffle4(a.as_i64x2(), b, [0, 1, 2, 3]); + let dst: i64x4 = simd_shuffle4!(a.as_i64x2(), b, [0, 1, 2, 3]); transmute(dst) } @@ -2687,7 +2687,7 @@ pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i { // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d { - simd_shuffle4(a, _mm_setzero_pd(), [0, 1, 2, 3]) + simd_shuffle4!(a, _mm_setzero_pd(), [0, 1, 2, 3]) } /// Returns vector of type `__m256` with undefined elements. @@ -2732,7 +2732,7 @@ pub unsafe fn _mm256_undefined_si256() -> __m256i { #[cfg_attr(test, assert_instr(vinsertf128))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 { - simd_shuffle8(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7]) } /// Sets packed __m256d returned vector with the supplied values. diff --git a/crates/core_arch/src/x86/avx2.rs b/crates/core_arch/src/x86/avx2.rs index d328632129..108ba0f0bb 100644 --- a/crates/core_arch/src/x86/avx2.rs +++ b/crates/core_arch/src/x86/avx2.rs @@ -175,7 +175,7 @@ pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i) -> __m let b = b.as_i8x32(); let r: i8x32 = match IMM8 % 16 { - 0 => simd_shuffle32( + 0 => simd_shuffle32!( b, a, [ @@ -183,7 +183,7 @@ pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i) -> __m 23, 24, 25, 26, 27, 28, 29, 30, 31, ], ), - 1 => simd_shuffle32( + 1 => simd_shuffle32!( b, a, [ @@ -191,7 +191,7 @@ pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i) -> __m 24, 25, 26, 27, 28, 29, 30, 31, 48, ], ), - 2 => simd_shuffle32( + 2 => simd_shuffle32!( b, a, [ @@ -199,7 +199,7 @@ pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i) -> __m 25, 26, 27, 28, 29, 30, 31, 48, 49, ], ), - 3 => simd_shuffle32( + 3 => simd_shuffle32!( b, a, [ @@ -207,7 +207,7 @@ pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i) -> __m 25, 26, 27, 28, 29, 30, 31, 48, 49, 50, ], ), - 4 => simd_shuffle32( + 4 => simd_shuffle32!( b, a, [ @@ -215,7 +215,7 @@ pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i) -> __m 26, 27, 28, 29, 30, 31, 48, 49, 50, 51, ], ), - 5 => simd_shuffle32( + 5 => simd_shuffle32!( b, a, [ @@ -223,7 +223,7 @@ pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i) -> __m 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, ], ), - 6 => simd_shuffle32( + 6 => simd_shuffle32!( b, a, [ @@ -231,7 +231,7 @@ pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i) -> __m 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, ], ), - 7 => simd_shuffle32( + 7 => simd_shuffle32!( b, a, [ @@ -239,7 +239,7 @@ pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i) -> __m 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, ], ), - 8 => simd_shuffle32( + 8 => simd_shuffle32!( b, a, [ @@ -247,7 +247,7 @@ pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i) -> __m 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, ], ), - 9 => simd_shuffle32( + 9 => simd_shuffle32!( b, a, [ @@ -255,7 +255,7 @@ pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i) -> __m 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, ], ), - 10 => simd_shuffle32( + 10 => simd_shuffle32!( b, a, [ @@ -263,7 +263,7 @@ pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i) -> __m 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, ], ), - 11 => simd_shuffle32( + 11 => simd_shuffle32!( b, a, [ @@ -271,7 +271,7 @@ pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i) -> __m 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, ], ), - 12 => simd_shuffle32( + 12 => simd_shuffle32!( b, a, [ @@ -279,7 +279,7 @@ pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i) -> __m 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, ], ), - 13 => simd_shuffle32( + 13 => simd_shuffle32!( b, a, [ @@ -287,7 +287,7 @@ pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i) -> __m 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, ], ), - 14 => simd_shuffle32( + 14 => simd_shuffle32!( b, a, [ @@ -295,7 +295,7 @@ pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i) -> __m 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, ], ), - 15 => simd_shuffle32( + 15 => simd_shuffle32!( b, a, [ @@ -370,10 +370,10 @@ pub unsafe fn _mm_blend_epi32(a: __m128i, b: __m128i) -> __m128 static_assert_imm4!(IMM4); let a = a.as_i32x4(); let b = b.as_i32x4(); - let r: i32x4 = simd_shuffle4( + let r: i32x4 = simd_shuffle4_param!( a, b, - [ + [ [0, 4, 0, 4][IMM4 as usize & 0b11], [1, 1, 5, 5][IMM4 as usize & 0b11], [2, 6, 2, 6][(IMM4 as usize >> 2) & 0b11], @@ -395,10 +395,10 @@ pub unsafe fn _mm256_blend_epi32(a: __m256i, b: __m256i) -> __m static_assert_imm8!(IMM8); let a = a.as_i32x8(); let b = b.as_i32x8(); - let r: i32x8 = simd_shuffle8( + let r: i32x8 = simd_shuffle8_param!( a, b, - [ + [ [0, 8, 0, 8][IMM8 as usize & 0b11], [1, 1, 9, 9][IMM8 as usize & 0b11], [2, 10, 2, 10][(IMM8 as usize >> 2) & 0b11], @@ -424,10 +424,11 @@ pub unsafe fn _mm256_blend_epi16(a: __m256i, b: __m256i) -> __m static_assert_imm8!(IMM8); let a = a.as_i16x16(); let b = b.as_i16x16(); - let r: i16x16 = simd_shuffle16( + + let r: i16x16 = simd_shuffle16_param!( a, b, - [ + [ [0, 16, 0, 16][IMM8 as usize & 0b11], [1, 1, 17, 17][IMM8 as usize & 0b11], [2, 18, 2, 18][(IMM8 as usize >> 2) & 0b11], @@ -470,7 +471,7 @@ pub unsafe fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m25 #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_broadcastb_epi8(a: __m128i) -> __m128i { let zero = _mm_setzero_si128(); - let ret = simd_shuffle16(a.as_i8x16(), zero.as_i8x16(), [0_u32; 16]); + let ret = simd_shuffle16!(a.as_i8x16(), zero.as_i8x16(), [0_u32; 16]); transmute::(ret) } @@ -484,7 +485,7 @@ pub unsafe fn _mm_broadcastb_epi8(a: __m128i) -> __m128i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i { let zero = _mm_setzero_si128(); - let ret = simd_shuffle32(a.as_i8x16(), zero.as_i8x16(), [0_u32; 32]); + let ret = simd_shuffle32!(a.as_i8x16(), zero.as_i8x16(), [0_u32; 32]); transmute::(ret) } @@ -500,7 +501,7 @@ pub unsafe fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_broadcastd_epi32(a: __m128i) -> __m128i { let zero = _mm_setzero_si128(); - let ret = simd_shuffle4(a.as_i32x4(), zero.as_i32x4(), [0_u32; 4]); + let ret = simd_shuffle4!(a.as_i32x4(), zero.as_i32x4(), [0_u32; 4]); transmute::(ret) } @@ -516,7 +517,7 @@ pub unsafe fn _mm_broadcastd_epi32(a: __m128i) -> __m128i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i { let zero = _mm_setzero_si128(); - let ret = simd_shuffle8(a.as_i32x4(), zero.as_i32x4(), [0_u32; 8]); + let ret = simd_shuffle8!(a.as_i32x4(), zero.as_i32x4(), [0_u32; 8]); transmute::(ret) } @@ -530,7 +531,7 @@ pub unsafe fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i { #[cfg_attr(test, assert_instr(vmovddup))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_broadcastq_epi64(a: __m128i) -> __m128i { - let ret = simd_shuffle2(a.as_i64x2(), a.as_i64x2(), [0_u32; 2]); + let ret = simd_shuffle2!(a.as_i64x2(), a.as_i64x2(), [0_u32; 2]); transmute::(ret) } @@ -543,7 +544,7 @@ pub unsafe fn _mm_broadcastq_epi64(a: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(vbroadcastsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i { - let ret = simd_shuffle4(a.as_i64x2(), a.as_i64x2(), [0_u32; 4]); + let ret = simd_shuffle4!(a.as_i64x2(), a.as_i64x2(), [0_u32; 4]); transmute::(ret) } @@ -556,7 +557,7 @@ pub unsafe fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i { #[cfg_attr(test, assert_instr(vmovddup))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_broadcastsd_pd(a: __m128d) -> __m128d { - simd_shuffle2(a, _mm_setzero_pd(), [0_u32; 2]) + simd_shuffle2!(a, _mm_setzero_pd(), [0_u32; 2]) } /// Broadcasts the low double-precision (64-bit) floating-point element @@ -568,7 +569,7 @@ pub unsafe fn _mm_broadcastsd_pd(a: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(vbroadcastsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d { - simd_shuffle4(a, _mm_setzero_pd(), [0_u32; 4]) + simd_shuffle4!(a, _mm_setzero_pd(), [0_u32; 4]) } // N.B., `broadcastsi128_si256` is often compiled to `vinsertf128` or @@ -582,7 +583,7 @@ pub unsafe fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i { let zero = _mm_setzero_si128(); - let ret = simd_shuffle4(a.as_i64x2(), zero.as_i64x2(), [0, 1, 0, 1]); + let ret = simd_shuffle4!(a.as_i64x2(), zero.as_i64x2(), [0, 1, 0, 1]); transmute::(ret) } @@ -595,7 +596,7 @@ pub unsafe fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i { #[cfg_attr(test, assert_instr(vbroadcastss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_broadcastss_ps(a: __m128) -> __m128 { - simd_shuffle4(a, _mm_setzero_ps(), [0_u32; 4]) + simd_shuffle4!(a, _mm_setzero_ps(), [0_u32; 4]) } /// Broadcasts the low single-precision (32-bit) floating-point element @@ -607,7 +608,7 @@ pub unsafe fn _mm_broadcastss_ps(a: __m128) -> __m128 { #[cfg_attr(test, assert_instr(vbroadcastss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcastss_ps(a: __m128) -> __m256 { - simd_shuffle8(a, _mm_setzero_ps(), [0_u32; 8]) + simd_shuffle8!(a, _mm_setzero_ps(), [0_u32; 8]) } /// Broadcasts the low packed 16-bit integer from a to all elements of @@ -620,7 +621,7 @@ pub unsafe fn _mm256_broadcastss_ps(a: __m128) -> __m256 { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_broadcastw_epi16(a: __m128i) -> __m128i { let zero = _mm_setzero_si128(); - let ret = simd_shuffle8(a.as_i16x8(), zero.as_i16x8(), [0_u32; 8]); + let ret = simd_shuffle8!(a.as_i16x8(), zero.as_i16x8(), [0_u32; 8]); transmute::(ret) } @@ -634,7 +635,7 @@ pub unsafe fn _mm_broadcastw_epi16(a: __m128i) -> __m128i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i { let zero = _mm_setzero_si128(); - let ret = simd_shuffle16(a.as_i16x8(), zero.as_i16x8(), [0_u32; 16]); + let ret = simd_shuffle16!(a.as_i16x8(), zero.as_i16x8(), [0_u32; 16]); transmute::(ret) } @@ -746,7 +747,7 @@ pub unsafe fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i { let a = a.as_i16x8(); - let v64: i16x4 = simd_shuffle4(a, a, [0, 1, 2, 3]); + let v64: i16x4 = simd_shuffle4!(a, a, [0, 1, 2, 3]); transmute::(simd_cast(v64)) } @@ -781,7 +782,7 @@ pub unsafe fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i { let a = a.as_i8x16(); - let v64: i8x8 = simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + let v64: i8x8 = simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); transmute::(simd_cast(v64)) } @@ -794,7 +795,7 @@ pub unsafe fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i { let a = a.as_i8x16(); - let v32: i8x4 = simd_shuffle4(a, a, [0, 1, 2, 3]); + let v32: i8x4 = simd_shuffle4!(a, a, [0, 1, 2, 3]); transmute::(simd_cast(v32)) } @@ -820,7 +821,7 @@ pub unsafe fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i { let a = a.as_u16x8(); - let v64: u16x4 = simd_shuffle4(a, a, [0, 1, 2, 3]); + let v64: u16x4 = simd_shuffle4!(a, a, [0, 1, 2, 3]); transmute::(simd_cast(v64)) } @@ -856,7 +857,7 @@ pub unsafe fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i { let a = a.as_u8x16(); - let v64: u8x8 = simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + let v64: u8x8 = simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); transmute::(simd_cast(v64)) } @@ -870,7 +871,7 @@ pub unsafe fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i { let a = a.as_u8x16(); - let v32: u8x4 = simd_shuffle4(a, a, [0, 1, 2, 3]); + let v32: u8x4 = simd_shuffle4!(a, a, [0, 1, 2, 3]); transmute::(simd_cast(v32)) } @@ -889,7 +890,7 @@ pub unsafe fn _mm256_extracti128_si256(a: __m256i) -> __m128i { static_assert_imm1!(IMM1); let a = a.as_i64x4(); let b = _mm256_undefined_si256().as_i64x4(); - let dst: i64x2 = simd_shuffle2(a, b, [[0, 1], [2, 3]][IMM1 as usize]); + let dst: i64x2 = simd_shuffle2_param!(a, b, [[0, 1], [2, 3]][IMM1 as usize]); transmute(dst) } @@ -1711,7 +1712,7 @@ pub unsafe fn _mm256_inserti128_si256(a: __m256i, b: __m128i) - static_assert_imm1!(IMM1); let a = a.as_i64x4(); let b = _mm256_castsi128_si256(b).as_i64x4(); - let dst: i64x4 = simd_shuffle4(a, b, [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]); + let dst: i64x4 = simd_shuffle4_param!(a, b, [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]); transmute(dst) } @@ -2200,10 +2201,10 @@ pub unsafe fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i { pub unsafe fn _mm256_permute4x64_epi64(a: __m256i) -> __m256i { static_assert_imm8!(IMM8); let zero = _mm256_setzero_si256().as_i64x4(); - let r: i64x4 = simd_shuffle4( + let r: i64x4 = simd_shuffle4_param!( a.as_i64x4(), zero, - [ + [ IMM8 as u32 & 0b11, (IMM8 as u32 >> 2) & 0b11, (IMM8 as u32 >> 4) & 0b11, @@ -2237,10 +2238,10 @@ pub unsafe fn _mm256_permute2x128_si256(a: __m256i, b: __m256i) #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permute4x64_pd(a: __m256d) -> __m256d { static_assert_imm8!(IMM8); - simd_shuffle4( + simd_shuffle4_param!( a, _mm256_undefined_pd(), - [ + [ IMM8 as u32 & 0b11, (IMM8 as u32 >> 2) & 0b11, (IMM8 as u32 >> 4) & 0b11, @@ -2350,10 +2351,10 @@ pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_shuffle_epi32(a: __m256i) -> __m256i { static_assert_imm8!(MASK); - let r: i32x8 = simd_shuffle8( + let r: i32x8 = simd_shuffle8_param!( a.as_i32x8(), a.as_i32x8(), - [ + [ MASK as u32 & 0b11, (MASK as u32 >> 2) & 0b11, (MASK as u32 >> 4) & 0b11, @@ -2380,10 +2381,10 @@ pub unsafe fn _mm256_shuffle_epi32(a: __m256i) -> __m256i { pub unsafe fn _mm256_shufflehi_epi16(a: __m256i) -> __m256i { static_assert_imm8!(IMM8); let a = a.as_i16x16(); - let r: i16x16 = simd_shuffle16( + let r: i16x16 = simd_shuffle16_param!( a, a, - [ + [ 0, 1, 2, @@ -2418,10 +2419,10 @@ pub unsafe fn _mm256_shufflehi_epi16(a: __m256i) -> __m256i { pub unsafe fn _mm256_shufflelo_epi16(a: __m256i) -> __m256i { static_assert_imm8!(IMM8); let a = a.as_i16x16(); - let r: i16x16 = simd_shuffle16( + let r: i16x16 = simd_shuffle16_param!( a, a, - [ + [ 0 + (IMM8 as u32 & 0b11), 0 + ((IMM8 as u32 >> 2) & 0b11), 0 + ((IMM8 as u32 >> 4) & 0b11), @@ -2585,10 +2586,10 @@ pub unsafe fn _mm256_bslli_epi128(a: __m256i) -> __m256i { static_assert_imm8!(IMM8); let a = a.as_i8x32(); let zero = _mm256_setzero_si256().as_i8x32(); - let r: i8x32 = simd_shuffle32( + let r: i8x32 = simd_shuffle32_param!( zero, a, - [ + [ 32 - (IMM8 as u32 & 0xff), 33 - (IMM8 as u32 & 0xff), 34 - (IMM8 as u32 & 0xff), @@ -2780,7 +2781,7 @@ pub unsafe fn _mm256_bsrli_epi128(a: __m256i) -> __m256i { let a = a.as_i8x32(); let zero = _mm256_setzero_si256().as_i8x32(); let r: i8x32 = match IMM8 % 16 { - 0 => simd_shuffle32( + 0 => simd_shuffle32!( a, zero, [ @@ -2788,7 +2789,7 @@ pub unsafe fn _mm256_bsrli_epi128(a: __m256i) -> __m256i { 23, 24, 25, 26, 27, 28, 29, 30, 31, ], ), - 1 => simd_shuffle32( + 1 => simd_shuffle32!( a, zero, [ @@ -2796,7 +2797,7 @@ pub unsafe fn _mm256_bsrli_epi128(a: __m256i) -> __m256i { 24, 25, 26, 27, 28, 29, 30, 31, 32, ], ), - 2 => simd_shuffle32( + 2 => simd_shuffle32!( a, zero, [ @@ -2804,7 +2805,7 @@ pub unsafe fn _mm256_bsrli_epi128(a: __m256i) -> __m256i { 25, 26, 27, 28, 29, 30, 31, 32, 32, ], ), - 3 => simd_shuffle32( + 3 => simd_shuffle32!( a, zero, [ @@ -2812,7 +2813,7 @@ pub unsafe fn _mm256_bsrli_epi128(a: __m256i) -> __m256i { 25, 26, 27, 28, 29, 30, 31, 32, 32, 32, ], ), - 4 => simd_shuffle32( + 4 => simd_shuffle32!( a, zero, [ @@ -2820,7 +2821,7 @@ pub unsafe fn _mm256_bsrli_epi128(a: __m256i) -> __m256i { 26, 27, 28, 29, 30, 31, 32, 32, 32, 32, ], ), - 5 => simd_shuffle32( + 5 => simd_shuffle32!( a, zero, [ @@ -2828,7 +2829,7 @@ pub unsafe fn _mm256_bsrli_epi128(a: __m256i) -> __m256i { 27, 28, 29, 30, 31, 32, 32, 32, 32, 32, ], ), - 6 => simd_shuffle32( + 6 => simd_shuffle32!( a, zero, [ @@ -2836,7 +2837,7 @@ pub unsafe fn _mm256_bsrli_epi128(a: __m256i) -> __m256i { 28, 29, 30, 31, 32, 32, 32, 32, 32, 32, ], ), - 7 => simd_shuffle32( + 7 => simd_shuffle32!( a, zero, [ @@ -2844,7 +2845,7 @@ pub unsafe fn _mm256_bsrli_epi128(a: __m256i) -> __m256i { 28, 29, 30, 31, 32, 32, 32, 32, 32, 32, 32, ], ), - 8 => simd_shuffle32( + 8 => simd_shuffle32!( a, zero, [ @@ -2852,7 +2853,7 @@ pub unsafe fn _mm256_bsrli_epi128(a: __m256i) -> __m256i { 29, 30, 31, 32, 32, 32, 32, 32, 32, 32, 32, ], ), - 9 => simd_shuffle32( + 9 => simd_shuffle32!( a, zero, [ @@ -2860,7 +2861,7 @@ pub unsafe fn _mm256_bsrli_epi128(a: __m256i) -> __m256i { 30, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, ], ), - 10 => simd_shuffle32( + 10 => simd_shuffle32!( a, zero, [ @@ -2868,7 +2869,7 @@ pub unsafe fn _mm256_bsrli_epi128(a: __m256i) -> __m256i { 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, ], ), - 11 => simd_shuffle32( + 11 => simd_shuffle32!( a, zero, [ @@ -2876,7 +2877,7 @@ pub unsafe fn _mm256_bsrli_epi128(a: __m256i) -> __m256i { 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, ], ), - 12 => simd_shuffle32( + 12 => simd_shuffle32!( a, zero, [ @@ -2884,7 +2885,7 @@ pub unsafe fn _mm256_bsrli_epi128(a: __m256i) -> __m256i { 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, ], ), - 13 => simd_shuffle32( + 13 => simd_shuffle32!( a, zero, [ @@ -2892,7 +2893,7 @@ pub unsafe fn _mm256_bsrli_epi128(a: __m256i) -> __m256i { 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, ], ), - 14 => simd_shuffle32( + 14 => simd_shuffle32!( a, zero, [ @@ -2900,7 +2901,7 @@ pub unsafe fn _mm256_bsrli_epi128(a: __m256i) -> __m256i { 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, ], ), - 15 => simd_shuffle32( + 15 => simd_shuffle32!( a, zero, [ @@ -3178,7 +3179,7 @@ pub unsafe fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i { #[rustfmt::skip] - let r: i8x32 = simd_shuffle32(a.as_i8x32(), b.as_i8x32(), [ + let r: i8x32 = simd_shuffle32!(a.as_i8x32(), b.as_i8x32(), [ 8, 40, 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47, 24, 56, 25, 57, 26, 58, 27, 59, @@ -3231,7 +3232,7 @@ pub unsafe fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i { #[rustfmt::skip] - let r: i8x32 = simd_shuffle32(a.as_i8x32(), b.as_i8x32(), [ + let r: i8x32 = simd_shuffle32!(a.as_i8x32(), b.as_i8x32(), [ 0, 32, 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39, 16, 48, 17, 49, 18, 50, 19, 51, @@ -3279,7 +3280,7 @@ pub unsafe fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpunpckhwd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i { - let r: i16x16 = simd_shuffle16( + let r: i16x16 = simd_shuffle16!( a.as_i16x16(), b.as_i16x16(), [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31], @@ -3327,7 +3328,7 @@ pub unsafe fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpunpcklwd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i { - let r: i16x16 = simd_shuffle16( + let r: i16x16 = simd_shuffle16!( a.as_i16x16(), b.as_i16x16(), [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27], @@ -3368,7 +3369,7 @@ pub unsafe fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vunpckhps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i { - let r: i32x8 = simd_shuffle8(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]); + let r: i32x8 = simd_shuffle8!(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]); transmute(r) } @@ -3405,7 +3406,7 @@ pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vunpcklps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i { - let r: i32x8 = simd_shuffle8(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]); + let r: i32x8 = simd_shuffle8!(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]); transmute(r) } @@ -3442,7 +3443,7 @@ pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vunpckhpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i { - let r: i64x4 = simd_shuffle4(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]); + let r: i64x4 = simd_shuffle4!(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]); transmute(r) } @@ -3479,7 +3480,7 @@ pub unsafe fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vunpcklpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i { - let r: i64x4 = simd_shuffle4(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]); + let r: i64x4 = simd_shuffle4!(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]); transmute(r) } diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 8b40413fba..a66d1c2934 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -6218,7 +6218,7 @@ pub unsafe fn _mm_mask_blend_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m12 #[cfg_attr(test, assert_instr(vpbroadcastw))] pub unsafe fn _mm512_broadcastw_epi16(a: __m128i) -> __m512i { let a = _mm512_castsi128_si512(a).as_i16x32(); - let ret: i16x32 = simd_shuffle32( + let ret: i16x32 = simd_shuffle32!( a, a, [ @@ -6306,7 +6306,7 @@ pub unsafe fn _mm_maskz_broadcastw_epi16(k: __mmask8, a: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(vpbroadcastb))] pub unsafe fn _mm512_broadcastb_epi8(a: __m128i) -> __m512i { let a = _mm512_castsi128_si512(a).as_i8x64(); - let ret: i8x64 = simd_shuffle64( + let ret: i8x64 = simd_shuffle64!( a, a, [ @@ -6397,7 +6397,7 @@ pub unsafe fn _mm512_unpackhi_epi16(a: __m512i, b: __m512i) -> __m512i { let a = a.as_i16x32(); let b = b.as_i16x32(); #[rustfmt::skip] - let r: i16x32 = simd_shuffle32( + let r: i16x32 = simd_shuffle32!( a, b, [ @@ -6508,7 +6508,7 @@ pub unsafe fn _mm512_unpackhi_epi8(a: __m512i, b: __m512i) -> __m512i { let a = a.as_i8x64(); let b = b.as_i8x64(); #[rustfmt::skip] - let r: i8x64 = simd_shuffle64( + let r: i8x64 = simd_shuffle64!( a, b, [ @@ -6627,7 +6627,7 @@ pub unsafe fn _mm512_unpacklo_epi16(a: __m512i, b: __m512i) -> __m512i { let a = a.as_i16x32(); let b = b.as_i16x32(); #[rustfmt::skip] - let r: i16x32 = simd_shuffle32( + let r: i16x32 = simd_shuffle32!( a, b, [ @@ -6738,7 +6738,7 @@ pub unsafe fn _mm512_unpacklo_epi8(a: __m512i, b: __m512i) -> __m512i { let a = a.as_i8x64(); let b = b.as_i8x64(); #[rustfmt::skip] - let r: i8x64 = simd_shuffle64( + let r: i8x64 = simd_shuffle64!( a, b, [ @@ -7133,10 +7133,10 @@ pub unsafe fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i { pub unsafe fn _mm512_shufflelo_epi16(a: __m512i) -> __m512i { static_assert_imm8!(IMM8); let a = a.as_i16x32(); - let r: i16x32 = simd_shuffle32( + let r: i16x32 = simd_shuffle32_param!( a, a, - [ + [ IMM8 as u32 & 0b11, (IMM8 as u32 >> 2) & 0b11, (IMM8 as u32 >> 4) & 0b11, @@ -7277,10 +7277,10 @@ pub unsafe fn _mm_maskz_shufflelo_epi16(k: __mmask8, a: __m128i pub unsafe fn _mm512_shufflehi_epi16(a: __m512i) -> __m512i { static_assert_imm8!(IMM8); let a = a.as_i16x32(); - let r: i16x32 = simd_shuffle32( + let r: i16x32 = simd_shuffle32_param!( a, a, - [ + [ 0, 1, 2, @@ -8433,7 +8433,7 @@ pub unsafe fn _mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i { pub unsafe fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i { let a = a.as_i16x8(); let zero = _mm_setzero_si128().as_i16x8(); - let v256: i16x16 = simd_shuffle16(a, zero, [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]); + let v256: i16x16 = simd_shuffle16!(a, zero, [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]); transmute::(simd_cast(v256)) } @@ -8875,10 +8875,10 @@ pub unsafe fn _mm512_bslli_epi128(a: __m512i) -> __m512i { static_assert_imm8!(IMM8); let a = a.as_i8x64(); let zero = _mm512_setzero_si512().as_i8x64(); - let r: i8x64 = simd_shuffle64( + let r: i8x64 = simd_shuffle64_param!( zero, a, - [ + [ 64 - (IMM8 as u32 & 0xff), 65 - (IMM8 as u32 & 0xff), 66 - (IMM8 as u32 & 0xff), @@ -8960,7 +8960,7 @@ pub unsafe fn _mm512_bsrli_epi128(a: __m512i) -> __m512i { let a = a.as_i8x64(); let zero = _mm512_setzero_si512().as_i8x64(); let r: i8x64 = match IMM8 % 16 { - 0 => simd_shuffle64( + 0 => simd_shuffle64!( a, zero, [ @@ -8969,7 +8969,7 @@ pub unsafe fn _mm512_bsrli_epi128(a: __m512i) -> __m512i { 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, ], ), - 1 => simd_shuffle64( + 1 => simd_shuffle64!( a, zero, [ @@ -8978,7 +8978,7 @@ pub unsafe fn _mm512_bsrli_epi128(a: __m512i) -> __m512i { 45, 46, 47, 96, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, ], ), - 2 => simd_shuffle64( + 2 => simd_shuffle64!( a, zero, [ @@ -8987,7 +8987,7 @@ pub unsafe fn _mm512_bsrli_epi128(a: __m512i) -> __m512i { 46, 47, 96, 97, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, ], ), - 3 => simd_shuffle64( + 3 => simd_shuffle64!( a, zero, [ @@ -8997,7 +8997,7 @@ pub unsafe fn _mm512_bsrli_epi128(a: __m512i) -> __m512i { 114, ], ), - 4 => simd_shuffle64( + 4 => simd_shuffle64!( a, zero, [ @@ -9007,7 +9007,7 @@ pub unsafe fn _mm512_bsrli_epi128(a: __m512i) -> __m512i { 115, ], ), - 5 => simd_shuffle64( + 5 => simd_shuffle64!( a, zero, [ @@ -9017,7 +9017,7 @@ pub unsafe fn _mm512_bsrli_epi128(a: __m512i) -> __m512i { 115, 116, ], ), - 6 => simd_shuffle64( + 6 => simd_shuffle64!( a, zero, [ @@ -9027,7 +9027,7 @@ pub unsafe fn _mm512_bsrli_epi128(a: __m512i) -> __m512i { 116, 117, ], ), - 7 => simd_shuffle64( + 7 => simd_shuffle64!( a, zero, [ @@ -9037,7 +9037,7 @@ pub unsafe fn _mm512_bsrli_epi128(a: __m512i) -> __m512i { 116, 117, 118, ], ), - 8 => simd_shuffle64( + 8 => simd_shuffle64!( a, zero, [ @@ -9047,7 +9047,7 @@ pub unsafe fn _mm512_bsrli_epi128(a: __m512i) -> __m512i { 116, 117, 118, 119, ], ), - 9 => simd_shuffle64( + 9 => simd_shuffle64!( a, zero, [ @@ -9057,7 +9057,7 @@ pub unsafe fn _mm512_bsrli_epi128(a: __m512i) -> __m512i { 117, 118, 119, 120, ], ), - 10 => simd_shuffle64( + 10 => simd_shuffle64!( a, zero, [ @@ -9067,7 +9067,7 @@ pub unsafe fn _mm512_bsrli_epi128(a: __m512i) -> __m512i { 118, 119, 120, 121, ], ), - 11 => simd_shuffle64( + 11 => simd_shuffle64!( a, zero, [ @@ -9077,7 +9077,7 @@ pub unsafe fn _mm512_bsrli_epi128(a: __m512i) -> __m512i { 117, 118, 119, 120, 121, 122, ], ), - 12 => simd_shuffle64( + 12 => simd_shuffle64!( a, zero, [ @@ -9087,7 +9087,7 @@ pub unsafe fn _mm512_bsrli_epi128(a: __m512i) -> __m512i { 118, 119, 120, 121, 122, 123, ], ), - 13 => simd_shuffle64( + 13 => simd_shuffle64!( a, zero, [ @@ -9097,7 +9097,7 @@ pub unsafe fn _mm512_bsrli_epi128(a: __m512i) -> __m512i { 119, 120, 121, 122, 123, 124, ], ), - 14 => simd_shuffle64( + 14 => simd_shuffle64!( a, zero, [ @@ -9107,7 +9107,7 @@ pub unsafe fn _mm512_bsrli_epi128(a: __m512i) -> __m512i { 120, 121, 122, 123, 124, 125, ], ), - 15 => simd_shuffle64( + 15 => simd_shuffle64!( a, zero, [ @@ -9146,7 +9146,7 @@ pub unsafe fn _mm512_alignr_epi8(a: __m512i, b: __m512i) -> __m let b = b.as_i8x64(); let r: i8x64 = match IMM8 % 16 { - 0 => simd_shuffle64( + 0 => simd_shuffle64!( b, a, [ @@ -9155,7 +9155,7 @@ pub unsafe fn _mm512_alignr_epi8(a: __m512i, b: __m512i) -> __m 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, ], ), - 1 => simd_shuffle64( + 1 => simd_shuffle64!( b, a, [ @@ -9164,7 +9164,7 @@ pub unsafe fn _mm512_alignr_epi8(a: __m512i, b: __m512i) -> __m 45, 46, 47, 96, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, ], ), - 2 => simd_shuffle64( + 2 => simd_shuffle64!( b, a, [ @@ -9173,7 +9173,7 @@ pub unsafe fn _mm512_alignr_epi8(a: __m512i, b: __m512i) -> __m 46, 47, 96, 97, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, ], ), - 3 => simd_shuffle64( + 3 => simd_shuffle64!( b, a, [ @@ -9183,7 +9183,7 @@ pub unsafe fn _mm512_alignr_epi8(a: __m512i, b: __m512i) -> __m 114, ], ), - 4 => simd_shuffle64( + 4 => simd_shuffle64!( b, a, [ @@ -9193,7 +9193,7 @@ pub unsafe fn _mm512_alignr_epi8(a: __m512i, b: __m512i) -> __m 115, ], ), - 5 => simd_shuffle64( + 5 => simd_shuffle64!( b, a, [ @@ -9203,7 +9203,7 @@ pub unsafe fn _mm512_alignr_epi8(a: __m512i, b: __m512i) -> __m 115, 116, ], ), - 6 => simd_shuffle64( + 6 => simd_shuffle64!( b, a, [ @@ -9213,7 +9213,7 @@ pub unsafe fn _mm512_alignr_epi8(a: __m512i, b: __m512i) -> __m 116, 117, ], ), - 7 => simd_shuffle64( + 7 => simd_shuffle64!( b, a, [ @@ -9223,7 +9223,7 @@ pub unsafe fn _mm512_alignr_epi8(a: __m512i, b: __m512i) -> __m 116, 117, 118, ], ), - 8 => simd_shuffle64( + 8 => simd_shuffle64!( b, a, [ @@ -9233,7 +9233,7 @@ pub unsafe fn _mm512_alignr_epi8(a: __m512i, b: __m512i) -> __m 116, 117, 118, 119, ], ), - 9 => simd_shuffle64( + 9 => simd_shuffle64!( b, a, [ @@ -9243,7 +9243,7 @@ pub unsafe fn _mm512_alignr_epi8(a: __m512i, b: __m512i) -> __m 117, 118, 119, 120, ], ), - 10 => simd_shuffle64( + 10 => simd_shuffle64!( b, a, [ @@ -9253,7 +9253,7 @@ pub unsafe fn _mm512_alignr_epi8(a: __m512i, b: __m512i) -> __m 118, 119, 120, 121, ], ), - 11 => simd_shuffle64( + 11 => simd_shuffle64!( b, a, [ @@ -9263,7 +9263,7 @@ pub unsafe fn _mm512_alignr_epi8(a: __m512i, b: __m512i) -> __m 117, 118, 119, 120, 121, 122, ], ), - 12 => simd_shuffle64( + 12 => simd_shuffle64!( b, a, [ @@ -9273,7 +9273,7 @@ pub unsafe fn _mm512_alignr_epi8(a: __m512i, b: __m512i) -> __m 118, 119, 120, 121, 122, 123, ], ), - 13 => simd_shuffle64( + 13 => simd_shuffle64!( b, a, [ @@ -9283,7 +9283,7 @@ pub unsafe fn _mm512_alignr_epi8(a: __m512i, b: __m512i) -> __m 119, 120, 121, 122, 123, 124, ], ), - 14 => simd_shuffle64( + 14 => simd_shuffle64!( b, a, [ @@ -9293,7 +9293,7 @@ pub unsafe fn _mm512_alignr_epi8(a: __m512i, b: __m512i) -> __m 120, 121, 122, 123, 124, 125, ], ), - 15 => simd_shuffle64( + 15 => simd_shuffle64!( b, a, [ diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 1b4ec89a1d..18c6b296d7 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -10529,7 +10529,7 @@ pub unsafe fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 { 0b11111111, _MM_FROUND_CUR_DIRECTION, ); - simd_shuffle16( + simd_shuffle16!( r, _mm256_setzero_ps().as_f32x8(), [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8], @@ -10549,7 +10549,7 @@ pub unsafe fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> _ k, _MM_FROUND_CUR_DIRECTION, ); - simd_shuffle16( + simd_shuffle16!( r, _mm256_setzero_ps().as_f32x8(), [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8], @@ -10644,7 +10644,7 @@ pub unsafe fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(vpmovsxbq))] pub unsafe fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i { let a = a.as_i8x16(); - let v64: i8x8 = simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + let v64: i8x8 = simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); transmute::(simd_cast(v64)) } @@ -10805,7 +10805,7 @@ pub unsafe fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(vpmovzxbq))] pub unsafe fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i { let a = a.as_u8x16(); - let v64: u8x8 = simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + let v64: u8x8 = simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); transmute::(simd_cast(v64)) } @@ -11628,7 +11628,7 @@ pub unsafe fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d { #[cfg_attr(test, assert_instr(vcvtudq2pd))] pub unsafe fn _mm_cvtepu32_pd(a: __m128i) -> __m128d { let a = a.as_u32x4(); - let u64: u32x2 = simd_shuffle2(a, a, [0, 1]); + let u64: u32x2 = simd_shuffle2!(a, a, [0, 1]); transmute::(simd_cast(u64)) } @@ -11663,7 +11663,7 @@ pub unsafe fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d { #[cfg_attr(test, assert_instr(vcvtdq2pd))] pub unsafe fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d { let v2 = v2.as_i32x16(); - let v256: i32x8 = simd_shuffle8(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]); + let v256: i32x8 = simd_shuffle8!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]); transmute::(simd_cast(v256)) } @@ -11686,7 +11686,7 @@ pub unsafe fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) #[cfg_attr(test, assert_instr(vcvtudq2pd))] pub unsafe fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d { let v2 = v2.as_u32x16(); - let v256: u32x8 = simd_shuffle8(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]); + let v256: u32x8 = simd_shuffle8!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]); transmute::(simd_cast(v256)) } @@ -19215,10 +19215,10 @@ pub unsafe fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> _ #[rustc_legacy_const_generics(1)] pub unsafe fn _mm512_permute_ps(a: __m512) -> __m512 { static_assert_imm8!(MASK); - simd_shuffle16( + simd_shuffle16_param!( a, a, - [ + [ MASK as u32 & 0b11, (MASK as u32 >> 2) & 0b11, ((MASK as u32 >> 4) & 0b11), @@ -19333,10 +19333,10 @@ pub unsafe fn _mm_maskz_permute_ps(k: __mmask8, a: __m128) -> _ #[rustc_legacy_const_generics(1)] pub unsafe fn _mm512_permute_pd(a: __m512d) -> __m512d { static_assert_imm8!(MASK); - simd_shuffle8( + simd_shuffle8_param!( a, a, - [ + [ MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1), ((MASK as u32 >> 2) & 0b1) + 2, @@ -19451,10 +19451,10 @@ pub unsafe fn _mm_maskz_permute_pd(k: __mmask8, a: __m128d) -> #[rustc_legacy_const_generics(1)] pub unsafe fn _mm512_permutex_epi64(a: __m512i) -> __m512i { static_assert_imm8!(MASK); - simd_shuffle8( + simd_shuffle8_param!( a, a, - [ + [ MASK as u32 & 0b11, (MASK as u32 >> 2) & 0b11, ((MASK as u32 >> 4) & 0b11), @@ -19507,10 +19507,10 @@ pub unsafe fn _mm512_maskz_permutex_epi64(k: __mmask8, a: __m51 #[rustc_legacy_const_generics(1)] pub unsafe fn _mm256_permutex_epi64(a: __m256i) -> __m256i { static_assert_imm8!(MASK); - simd_shuffle4( + simd_shuffle4_param!( a, a, - [ + [ MASK as u32 & 0b11, (MASK as u32 >> 2) & 0b11, ((MASK as u32 >> 4) & 0b11), @@ -19559,10 +19559,10 @@ pub unsafe fn _mm256_maskz_permutex_epi64(k: __mmask8, a: __m25 #[rustc_legacy_const_generics(1)] pub unsafe fn _mm512_permutex_pd(a: __m512d) -> __m512d { static_assert_imm8!(MASK); - simd_shuffle8( + simd_shuffle8_param!( a, a, - [ + [ MASK as u32 & 0b11, (MASK as u32 >> 2) & 0b11, ((MASK as u32 >> 4) & 0b11), @@ -19613,10 +19613,10 @@ pub unsafe fn _mm512_maskz_permutex_pd(k: __mmask8, a: __m512d) #[rustc_legacy_const_generics(1)] pub unsafe fn _mm256_permutex_pd(a: __m256d) -> __m256d { static_assert_imm8!(MASK); - simd_shuffle4( + simd_shuffle4_param!( a, a, - [ + [ MASK as u32 & 0b11, (MASK as u32 >> 2) & 0b11, ((MASK as u32 >> 4) & 0b11), @@ -20867,10 +20867,10 @@ pub unsafe fn _mm_mask2_permutex2var_pd( #[rustc_legacy_const_generics(1)] pub unsafe fn _mm512_shuffle_epi32(a: __m512i) -> __m512i { static_assert_imm8!(MASK); - let r: i32x16 = simd_shuffle16( + let r: i32x16 = simd_shuffle16_param!( a.as_i32x16(), a.as_i32x16(), - [ + [ MASK as u32 & 0b11, (MASK as u32 >> 2) & 0b11, (MASK as u32 >> 4) & 0b11, @@ -21003,10 +21003,10 @@ pub unsafe fn _mm_maskz_shuffle_epi32( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm512_shuffle_ps(a: __m512, b: __m512) -> __m512 { static_assert_imm8!(MASK); - simd_shuffle16( + simd_shuffle16_param!( a, b, - [ + [ MASK as u32 & 0b11, (MASK as u32 >> 2) & 0b11, ((MASK as u32 >> 4) & 0b11) + 16, @@ -21140,10 +21140,10 @@ pub unsafe fn _mm_maskz_shuffle_ps(k: __mmask8, a: __m128, b: _ #[rustc_legacy_const_generics(2)] pub unsafe fn _mm512_shuffle_pd(a: __m512d, b: __m512d) -> __m512d { static_assert_imm8!(MASK); - simd_shuffle8( + simd_shuffle8_param!( a, b, - [ + [ MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 8, ((MASK as u32 >> 2) & 0b1) + 2, @@ -21275,10 +21275,10 @@ pub unsafe fn _mm512_shuffle_i32x4(a: __m512i, b: __m512i) -> _ static_assert_imm8!(MASK); let a = a.as_i32x16(); let b = b.as_i32x16(); - let r: i32x16 = simd_shuffle16( + let r: i32x16 = simd_shuffle16_param!( a, b, - [ + [ (MASK as u32 & 0b11) * 4 + 0, (MASK as u32 & 0b11) * 4 + 1, (MASK as u32 & 0b11) * 4 + 2, @@ -21347,10 +21347,10 @@ pub unsafe fn _mm256_shuffle_i32x4(a: __m256i, b: __m256i) -> _ static_assert_imm8!(MASK); let a = a.as_i32x8(); let b = b.as_i32x8(); - let r: i32x8 = simd_shuffle8( + let r: i32x8 = simd_shuffle8_param!( a, b, - [ + [ (MASK as u32 & 0b1) * 4 + 0, (MASK as u32 & 0b1) * 4 + 1, (MASK as u32 & 0b1) * 4 + 2, @@ -21411,10 +21411,10 @@ pub unsafe fn _mm512_shuffle_i64x2(a: __m512i, b: __m512i) -> _ static_assert_imm8!(MASK); let a = a.as_i64x8(); let b = b.as_i64x8(); - let r: i64x8 = simd_shuffle8( + let r: i64x8 = simd_shuffle8_param!( a, b, - [ + [ (MASK as u32 & 0b11) * 2 + 0, (MASK as u32 & 0b11) * 2 + 1, ((MASK as u32 >> 2) & 0b11) * 2 + 0, @@ -21475,10 +21475,10 @@ pub unsafe fn _mm256_shuffle_i64x2(a: __m256i, b: __m256i) -> _ static_assert_imm8!(MASK); let a = a.as_i64x4(); let b = b.as_i64x4(); - let r: i64x4 = simd_shuffle4( + let r: i64x4 = simd_shuffle4_param!( a, b, - [ + [ (MASK as u32 & 0b1) * 2 + 0, (MASK as u32 & 0b1) * 2 + 1, ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4, @@ -21535,10 +21535,10 @@ pub unsafe fn _mm512_shuffle_f32x4(a: __m512, b: __m512) -> __m static_assert_imm8!(MASK); let a = a.as_f32x16(); let b = b.as_f32x16(); - let r: f32x16 = simd_shuffle16( + let r: f32x16 = simd_shuffle16_param!( a, b, - [ + [ (MASK as u32 & 0b11) * 4 + 0, (MASK as u32 & 0b11) * 4 + 1, (MASK as u32 & 0b11) * 4 + 2, @@ -21607,10 +21607,10 @@ pub unsafe fn _mm256_shuffle_f32x4(a: __m256, b: __m256) -> __m static_assert_imm8!(MASK); let a = a.as_f32x8(); let b = b.as_f32x8(); - let r: f32x8 = simd_shuffle8( + let r: f32x8 = simd_shuffle8_param!( a, b, - [ + [ (MASK as u32 & 0b1) * 4 + 0, (MASK as u32 & 0b1) * 4 + 1, (MASK as u32 & 0b1) * 4 + 2, @@ -21671,10 +21671,10 @@ pub unsafe fn _mm512_shuffle_f64x2(a: __m512d, b: __m512d) -> _ static_assert_imm8!(MASK); let a = a.as_f64x8(); let b = b.as_f64x8(); - let r: f64x8 = simd_shuffle8( + let r: f64x8 = simd_shuffle8_param!( a, b, - [ + [ (MASK as u32 & 0b11) * 2 + 0, (MASK as u32 & 0b11) * 2 + 1, ((MASK as u32 >> 2) & 0b11) * 2 + 0, @@ -21735,10 +21735,10 @@ pub unsafe fn _mm256_shuffle_f64x2(a: __m256d, b: __m256d) -> _ static_assert_imm8!(MASK); let a = a.as_f64x4(); let b = b.as_f64x4(); - let r: f64x4 = simd_shuffle4( + let r: f64x4 = simd_shuffle4_param!( a, b, - [ + [ (MASK as u32 & 0b1) * 2 + 0, (MASK as u32 & 0b1) * 2 + 1, ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4, @@ -21797,10 +21797,10 @@ pub unsafe fn _mm256_maskz_shuffle_f64x2( pub unsafe fn _mm512_extractf32x4_ps(a: __m512) -> __m128 { static_assert_imm2!(IMM8); match IMM8 & 0x3 { - 0 => simd_shuffle4(a, _mm512_undefined_ps(), [0, 1, 2, 3]), - 1 => simd_shuffle4(a, _mm512_undefined_ps(), [4, 5, 6, 7]), - 2 => simd_shuffle4(a, _mm512_undefined_ps(), [8, 9, 10, 11]), - _ => simd_shuffle4(a, _mm512_undefined_ps(), [12, 13, 14, 15]), + 0 => simd_shuffle4!(a, _mm512_undefined_ps(), [0, 1, 2, 3]), + 1 => simd_shuffle4!(a, _mm512_undefined_ps(), [4, 5, 6, 7]), + 2 => simd_shuffle4!(a, _mm512_undefined_ps(), [8, 9, 10, 11]), + _ => simd_shuffle4!(a, _mm512_undefined_ps(), [12, 13, 14, 15]), } } @@ -21854,8 +21854,8 @@ pub unsafe fn _mm512_maskz_extractf32x4_ps(k: __mmask8, a: __m5 pub unsafe fn _mm256_extractf32x4_ps(a: __m256) -> __m128 { static_assert_imm1!(IMM8); match IMM8 & 0x1 { - 0 => simd_shuffle4(a, _mm256_undefined_ps(), [0, 1, 2, 3]), - _ => simd_shuffle4(a, _mm256_undefined_ps(), [4, 5, 6, 7]), + 0 => simd_shuffle4!(a, _mm256_undefined_ps(), [0, 1, 2, 3]), + _ => simd_shuffle4!(a, _mm256_undefined_ps(), [4, 5, 6, 7]), } } @@ -21909,8 +21909,8 @@ pub unsafe fn _mm256_maskz_extractf32x4_ps(k: __mmask8, a: __m2 pub unsafe fn _mm512_extracti64x4_epi64(a: __m512i) -> __m256i { static_assert_imm1!(IMM1); match IMM1 { - 0 => simd_shuffle4(a, _mm512_set1_epi64(0), [0, 1, 2, 3]), - _ => simd_shuffle4(a, _mm512_set1_epi64(0), [4, 5, 6, 7]), + 0 => simd_shuffle4!(a, _mm512_set1_epi64(0), [0, 1, 2, 3]), + _ => simd_shuffle4!(a, _mm512_set1_epi64(0), [4, 5, 6, 7]), } } @@ -21964,8 +21964,8 @@ pub unsafe fn _mm512_maskz_extracti64x4_epi64(k: __mmask8, a: _ pub unsafe fn _mm512_extractf64x4_pd(a: __m512d) -> __m256d { static_assert_imm1!(IMM8); match IMM8 & 0x1 { - 0 => simd_shuffle4(a, _mm512_undefined_pd(), [0, 1, 2, 3]), - _ => simd_shuffle4(a, _mm512_undefined_pd(), [4, 5, 6, 7]), + 0 => simd_shuffle4!(a, _mm512_undefined_pd(), [0, 1, 2, 3]), + _ => simd_shuffle4!(a, _mm512_undefined_pd(), [4, 5, 6, 7]), } } @@ -22021,10 +22021,10 @@ pub unsafe fn _mm512_extracti32x4_epi32(a: __m512i) -> __m128i let a = a.as_i32x16(); let undefined = _mm512_undefined_epi32().as_i32x16(); let extract: i32x4 = match IMM2 { - 0 => simd_shuffle4(a, undefined, [0, 1, 2, 3]), - 1 => simd_shuffle4(a, undefined, [4, 5, 6, 7]), - 2 => simd_shuffle4(a, undefined, [8, 9, 10, 11]), - _ => simd_shuffle4(a, undefined, [12, 13, 14, 15]), + 0 => simd_shuffle4!(a, undefined, [0, 1, 2, 3]), + 1 => simd_shuffle4!(a, undefined, [4, 5, 6, 7]), + 2 => simd_shuffle4!(a, undefined, [8, 9, 10, 11]), + _ => simd_shuffle4!(a, undefined, [12, 13, 14, 15]), }; transmute(extract) } @@ -22081,8 +22081,8 @@ pub unsafe fn _mm256_extracti32x4_epi32(a: __m256i) -> __m128i let a = a.as_i32x8(); let undefined = _mm256_undefined_si256().as_i32x8(); let extract: i32x4 = match IMM1 { - 0 => simd_shuffle4(a, undefined, [0, 1, 2, 3]), - _ => simd_shuffle4(a, undefined, [4, 5, 6, 7]), + 0 => simd_shuffle4!(a, undefined, [0, 1, 2, 3]), + _ => simd_shuffle4!(a, undefined, [4, 5, 6, 7]), }; transmute(extract) } @@ -22131,7 +22131,7 @@ pub unsafe fn _mm256_maskz_extracti32x4_epi32(k: __mmask8, a: _ #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vmovsldup))] pub unsafe fn _mm512_moveldup_ps(a: __m512) -> __m512 { - let r: f32x16 = simd_shuffle16(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]); + let r: f32x16 = simd_shuffle16!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]); transmute(r) } @@ -22142,7 +22142,7 @@ pub unsafe fn _mm512_moveldup_ps(a: __m512) -> __m512 { #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vmovsldup))] pub unsafe fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { - let mov: f32x16 = simd_shuffle16(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]); + let mov: f32x16 = simd_shuffle16!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]); transmute(simd_select_bitmask(k, mov, src.as_f32x16())) } @@ -22153,7 +22153,7 @@ pub unsafe fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> _ #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vmovsldup))] pub unsafe fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 { - let mov: f32x16 = simd_shuffle16(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]); + let mov: f32x16 = simd_shuffle16!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]); let zero = _mm512_setzero_ps().as_f32x16(); transmute(simd_select_bitmask(k, mov, zero)) } @@ -22211,7 +22211,7 @@ pub unsafe fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vmovshdup))] pub unsafe fn _mm512_movehdup_ps(a: __m512) -> __m512 { - let r: f32x16 = simd_shuffle16(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]); + let r: f32x16 = simd_shuffle16!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]); transmute(r) } @@ -22222,7 +22222,7 @@ pub unsafe fn _mm512_movehdup_ps(a: __m512) -> __m512 { #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vmovshdup))] pub unsafe fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { - let mov: f32x16 = simd_shuffle16(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]); + let mov: f32x16 = simd_shuffle16!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]); transmute(simd_select_bitmask(k, mov, src.as_f32x16())) } @@ -22233,7 +22233,7 @@ pub unsafe fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> _ #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vmovshdup))] pub unsafe fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 { - let mov: f32x16 = simd_shuffle16(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]); + let mov: f32x16 = simd_shuffle16!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]); let zero = _mm512_setzero_ps().as_f32x16(); transmute(simd_select_bitmask(k, mov, zero)) } @@ -22291,7 +22291,7 @@ pub unsafe fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vmovddup))] pub unsafe fn _mm512_movedup_pd(a: __m512d) -> __m512d { - let r: f64x8 = simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6]); + let r: f64x8 = simd_shuffle8!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]); transmute(r) } @@ -22302,7 +22302,7 @@ pub unsafe fn _mm512_movedup_pd(a: __m512d) -> __m512d { #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vmovddup))] pub unsafe fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { - let mov: f64x8 = simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6]); + let mov: f64x8 = simd_shuffle8!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]); transmute(simd_select_bitmask(k, mov, src.as_f64x8())) } @@ -22313,7 +22313,7 @@ pub unsafe fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> _ #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vmovddup))] pub unsafe fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d { - let mov: f64x8 = simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6]); + let mov: f64x8 = simd_shuffle8!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]); let zero = _mm512_setzero_pd().as_f64x8(); transmute(simd_select_bitmask(k, mov, zero)) } @@ -22376,22 +22376,22 @@ pub unsafe fn _mm512_inserti32x4(a: __m512i, b: __m128i) -> __m let a = a.as_i32x16(); let b = _mm512_castsi128_si512(b).as_i32x16(); let ret: i32x16 = match IMM8 & 0b11 { - 0 => simd_shuffle16( + 0 => simd_shuffle16!( a, b, [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], ), - 1 => simd_shuffle16( + 1 => simd_shuffle16!( a, b, [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15], ), - 2 => simd_shuffle16( + 2 => simd_shuffle16!( a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15], ), - _ => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]), + _ => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]), }; transmute(ret) } @@ -22447,8 +22447,8 @@ pub unsafe fn _mm256_inserti32x4(a: __m256i, b: __m128i) -> __m let a = a.as_i32x8(); let b = _mm256_castsi128_si256(b).as_i32x8(); let ret: i32x8 = match IMM8 & 0b1 { - 0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]), - _ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]), + 0 => simd_shuffle8!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]), + _ => simd_shuffle8!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]), }; transmute(ret) } @@ -22506,8 +22506,8 @@ pub unsafe fn _mm512_inserti64x4(a: __m512i, b: __m256i) -> __m static_assert_imm1!(IMM8); let b = _mm512_castsi256_si512(b); match IMM8 & 0b1 { - 0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]), - _ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]), + 0 => simd_shuffle8!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]), + _ => simd_shuffle8!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]), } } @@ -22558,22 +22558,22 @@ pub unsafe fn _mm512_insertf32x4(a: __m512, b: __m128) -> __m51 static_assert_imm2!(IMM8); let b = _mm512_castps128_ps512(b); match IMM8 & 0b11 { - 0 => simd_shuffle16( + 0 => simd_shuffle16!( a, b, [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], ), - 1 => simd_shuffle16( + 1 => simd_shuffle16!( a, b, [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15], ), - 2 => simd_shuffle16( + 2 => simd_shuffle16!( a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15], ), - _ => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]), + _ => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]), } } @@ -22627,8 +22627,8 @@ pub unsafe fn _mm256_insertf32x4(a: __m256, b: __m128) -> __m25 static_assert_imm1!(IMM8); let b = _mm256_castps128_ps256(b); match IMM8 & 0b1 { - 0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]), - _ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]), + 0 => simd_shuffle8!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]), + _ => simd_shuffle8!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]), } } @@ -22685,8 +22685,8 @@ pub unsafe fn _mm512_insertf64x4(a: __m512d, b: __m256d) -> __m static_assert_imm1!(IMM8); let b = _mm512_castpd256_pd512(b); match IMM8 & 0b1 { - 0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]), - _ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]), + 0 => simd_shuffle8!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]), + _ => simd_shuffle8!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]), } } @@ -22736,7 +22736,7 @@ pub unsafe fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i { let a = a.as_i32x16(); let b = b.as_i32x16(); #[rustfmt::skip] - let r: i32x16 = simd_shuffle16( + let r: i32x16 = simd_shuffle16!( a, b, [ 2, 18, 3, 19, 2 + 4, 18 + 4, 3 + 4, 19 + 4, @@ -22837,7 +22837,7 @@ pub unsafe fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> _ #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vunpckhpd))] //should be vpunpckhqdq pub unsafe fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i { - simd_shuffle8(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) + simd_shuffle8!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) } /// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -22932,7 +22932,7 @@ pub unsafe fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> _ #[cfg_attr(test, assert_instr(vunpckhps))] pub unsafe fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 { #[rustfmt::skip] - simd_shuffle16( + simd_shuffle16!( a, b, [ 2, 18, 3, 19, 2 + 4, 18 + 4, 3 + 4, 19 + 4, @@ -23017,7 +23017,7 @@ pub unsafe fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vunpckhpd))] pub unsafe fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d { - simd_shuffle8(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) + simd_shuffle8!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) } /// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -23109,7 +23109,7 @@ pub unsafe fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i { let a = a.as_i32x16(); let b = b.as_i32x16(); #[rustfmt::skip] - let r: i32x16 = simd_shuffle16( + let r: i32x16 = simd_shuffle16!( a, b, [ 0, 16, 1, 17, 0 + 4, 16 + 4, 1 + 4, 17 + 4, @@ -23210,7 +23210,7 @@ pub unsafe fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> _ #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vunpcklpd))] //should be vpunpcklqdq pub unsafe fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i { - simd_shuffle8(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) + simd_shuffle8!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) } /// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -23305,7 +23305,7 @@ pub unsafe fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> _ #[cfg_attr(test, assert_instr(vunpcklps))] pub unsafe fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 { #[rustfmt::skip] - simd_shuffle16(a, b, + simd_shuffle16!(a, b, [ 0, 16, 1, 17, 0 + 4, 16 + 4, 1 + 4, 17 + 4, 0 + 8, 16 + 8, 1 + 8, 17 + 8, @@ -23389,7 +23389,7 @@ pub unsafe fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vunpcklpd))] pub unsafe fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d { - simd_shuffle8(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) + simd_shuffle8!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) } /// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -23477,7 +23477,7 @@ pub unsafe fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m1 #[inline] #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_castps128_ps512(a: __m128) -> __m512 { - simd_shuffle16( + simd_shuffle16!( a, _mm_set1_ps(-1.), [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4], @@ -23490,7 +23490,7 @@ pub unsafe fn _mm512_castps128_ps512(a: __m128) -> __m512 { #[inline] #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_castps256_ps512(a: __m256) -> __m512 { - simd_shuffle16( + simd_shuffle16!( a, _mm256_set1_ps(-1.), [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8], @@ -23503,7 +23503,7 @@ pub unsafe fn _mm512_castps256_ps512(a: __m256) -> __m512 { #[inline] #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_zextps128_ps512(a: __m128) -> __m512 { - simd_shuffle16( + simd_shuffle16!( a, _mm_set1_ps(0.), [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4], @@ -23516,7 +23516,7 @@ pub unsafe fn _mm512_zextps128_ps512(a: __m128) -> __m512 { #[inline] #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_zextps256_ps512(a: __m256) -> __m512 { - simd_shuffle16( + simd_shuffle16!( a, _mm256_set1_ps(0.), [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8], @@ -23529,7 +23529,7 @@ pub unsafe fn _mm512_zextps256_ps512(a: __m256) -> __m512 { #[inline] #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_castps512_ps128(a: __m512) -> __m128 { - simd_shuffle4(a, a, [0, 1, 2, 3]) + simd_shuffle4!(a, a, [0, 1, 2, 3]) } /// Cast vector of type __m512 to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -23538,7 +23538,7 @@ pub unsafe fn _mm512_castps512_ps128(a: __m512) -> __m128 { #[inline] #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_castps512_ps256(a: __m512) -> __m256 { - simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } /// Cast vector of type __m512 to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -23565,7 +23565,7 @@ pub unsafe fn _mm512_castps_si512(a: __m512) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_castpd128_pd512(a: __m128d) -> __m512d { - simd_shuffle8(a, _mm_set1_pd(-1.), [0, 1, 2, 2, 2, 2, 2, 2]) + simd_shuffle8!(a, _mm_set1_pd(-1.), [0, 1, 2, 2, 2, 2, 2, 2]) } /// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -23574,7 +23574,7 @@ pub unsafe fn _mm512_castpd128_pd512(a: __m128d) -> __m512d { #[inline] #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_castpd256_pd512(a: __m256d) -> __m512d { - simd_shuffle8(a, _mm256_set1_pd(-1.), [0, 1, 2, 3, 4, 4, 4, 4]) + simd_shuffle8!(a, _mm256_set1_pd(-1.), [0, 1, 2, 3, 4, 4, 4, 4]) } /// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -23583,7 +23583,7 @@ pub unsafe fn _mm512_castpd256_pd512(a: __m256d) -> __m512d { #[inline] #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d { - simd_shuffle8(a, _mm_set1_pd(0.), [0, 1, 2, 2, 2, 2, 2, 2]) + simd_shuffle8!(a, _mm_set1_pd(0.), [0, 1, 2, 2, 2, 2, 2, 2]) } /// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -23592,7 +23592,7 @@ pub unsafe fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d { #[inline] #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d { - simd_shuffle8(a, _mm256_set1_pd(0.), [0, 1, 2, 3, 4, 4, 4, 4]) + simd_shuffle8!(a, _mm256_set1_pd(0.), [0, 1, 2, 3, 4, 4, 4, 4]) } /// Cast vector of type __m512d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -23601,7 +23601,7 @@ pub unsafe fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d { #[inline] #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_castpd512_pd128(a: __m512d) -> __m128d { - simd_shuffle2(a, a, [0, 1]) + simd_shuffle2!(a, a, [0, 1]) } /// Cast vector of type __m512d to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -23610,7 +23610,7 @@ pub unsafe fn _mm512_castpd512_pd128(a: __m512d) -> __m128d { #[inline] #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_castpd512_pd256(a: __m512d) -> __m256d { - simd_shuffle4(a, a, [0, 1, 2, 3]) + simd_shuffle4!(a, a, [0, 1, 2, 3]) } /// Cast vector of type __m512d to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -23637,7 +23637,7 @@ pub unsafe fn _mm512_castpd_si512(a: __m512d) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_castsi128_si512(a: __m128i) -> __m512i { - simd_shuffle8(a, _mm_set1_epi64x(-1), [0, 1, 2, 2, 2, 2, 2, 2]) + simd_shuffle8!(a, _mm_set1_epi64x(-1), [0, 1, 2, 2, 2, 2, 2, 2]) } /// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -23646,7 +23646,7 @@ pub unsafe fn _mm512_castsi128_si512(a: __m128i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_castsi256_si512(a: __m256i) -> __m512i { - simd_shuffle8(a, _mm256_set1_epi64x(-1), [0, 1, 2, 3, 4, 4, 4, 4]) + simd_shuffle8!(a, _mm256_set1_epi64x(-1), [0, 1, 2, 3, 4, 4, 4, 4]) } /// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -23655,7 +23655,7 @@ pub unsafe fn _mm512_castsi256_si512(a: __m256i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_zextsi128_si512(a: __m128i) -> __m512i { - simd_shuffle8(a, _mm_set1_epi64x(0), [0, 1, 2, 2, 2, 2, 2, 2]) + simd_shuffle8!(a, _mm_set1_epi64x(0), [0, 1, 2, 2, 2, 2, 2, 2]) } /// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -23664,7 +23664,7 @@ pub unsafe fn _mm512_zextsi128_si512(a: __m128i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_zextsi256_si512(a: __m256i) -> __m512i { - simd_shuffle8(a, _mm256_set1_epi64x(0), [0, 1, 2, 3, 4, 4, 4, 4]) + simd_shuffle8!(a, _mm256_set1_epi64x(0), [0, 1, 2, 3, 4, 4, 4, 4]) } /// Cast vector of type __m512i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -23673,7 +23673,7 @@ pub unsafe fn _mm512_zextsi256_si512(a: __m256i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_castsi512_si128(a: __m512i) -> __m128i { - simd_shuffle2(a, a, [0, 1]) + simd_shuffle2!(a, a, [0, 1]) } /// Cast vector of type __m512i to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -23682,7 +23682,7 @@ pub unsafe fn _mm512_castsi512_si128(a: __m512i) -> __m128i { #[inline] #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_castsi512_si256(a: __m512i) -> __m256i { - simd_shuffle4(a, a, [0, 1, 2, 3]) + simd_shuffle4!(a, a, [0, 1, 2, 3]) } /// Cast vector of type __m512i to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -23722,7 +23722,7 @@ pub unsafe fn _mm512_cvtsi512_si32(a: __m512i) -> i32 { #[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastd pub unsafe fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i { let a = _mm512_castsi128_si512(a).as_i32x16(); - let ret: i32x16 = simd_shuffle16(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + let ret: i32x16 = simd_shuffle16!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); transmute(ret) } @@ -23802,7 +23802,7 @@ pub unsafe fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vbroadcas))] //should be vpbroadcastq pub unsafe fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i { - simd_shuffle8(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) + simd_shuffle8!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) } /// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -23881,7 +23881,7 @@ pub unsafe fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vbroadcastss))] pub unsafe fn _mm512_broadcastss_ps(a: __m128) -> __m512 { - simd_shuffle16(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + simd_shuffle16!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) } /// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -23960,7 +23960,7 @@ pub unsafe fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vbroadcastsd))] pub unsafe fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d { - simd_shuffle8(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) + simd_shuffle8!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) } /// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24016,7 +24016,7 @@ pub unsafe fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d { #[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf pub unsafe fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i { let a = a.as_i32x4(); - let ret: i32x16 = simd_shuffle16(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]); + let ret: i32x16 = simd_shuffle16!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]); transmute(ret) } @@ -24048,7 +24048,7 @@ pub unsafe fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i #[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf pub unsafe fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i { let a = a.as_i32x4(); - let ret: i32x8 = simd_shuffle8(a, a, [0, 1, 2, 3, 0, 1, 2, 3]); + let ret: i32x8 = simd_shuffle8!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]); transmute(ret) } @@ -24079,7 +24079,7 @@ pub unsafe fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i { #[inline] #[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm pub unsafe fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i { - simd_shuffle8(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) + simd_shuffle8!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) } /// Broadcast the 4 packed 64-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24109,7 +24109,7 @@ pub unsafe fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshuf pub unsafe fn _mm512_broadcast_f32x4(a: __m128) -> __m512 { - simd_shuffle16(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]) + simd_shuffle16!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]) } /// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24139,7 +24139,7 @@ pub unsafe fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 { #[inline] #[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshuf pub unsafe fn _mm256_broadcast_f32x4(a: __m128) -> __m256 { - simd_shuffle8(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) + simd_shuffle8!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) } /// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24169,7 +24169,7 @@ pub unsafe fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 { #[inline] #[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vperm pub unsafe fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d { - simd_shuffle8(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) + simd_shuffle8!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) } /// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24326,66 +24326,66 @@ pub unsafe fn _mm512_alignr_epi32(a: __m512i, b: __m512i) -> __ let b = b.as_i32x16(); let imm8: i32 = IMM8 % 16; let r: i32x16 = match imm8 { - 0 => simd_shuffle16( + 0 => simd_shuffle16!( a, b, [ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, ], ), - 1 => simd_shuffle16( + 1 => simd_shuffle16!( a, b, [ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, ], ), - 2 => simd_shuffle16( + 2 => simd_shuffle16!( a, b, [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1], ), - 3 => simd_shuffle16( + 3 => simd_shuffle16!( a, b, [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2], ), - 4 => simd_shuffle16( + 4 => simd_shuffle16!( a, b, [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3], ), - 5 => simd_shuffle16( + 5 => simd_shuffle16!( a, b, [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4], ), - 6 => simd_shuffle16( + 6 => simd_shuffle16!( a, b, [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5], ), - 7 => simd_shuffle16( + 7 => simd_shuffle16!( a, b, [23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6], ), - 8 => simd_shuffle16( + 8 => simd_shuffle16!( a, b, [24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7], ), - 9 => simd_shuffle16( + 9 => simd_shuffle16!( a, b, [25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8], ), - 10 => simd_shuffle16(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), - 11 => simd_shuffle16(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), - 12 => simd_shuffle16(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]), - 13 => simd_shuffle16(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), - 14 => simd_shuffle16(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]), - _ => simd_shuffle16(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]), + 10 => simd_shuffle16!(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + 11 => simd_shuffle16!(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), + 12 => simd_shuffle16!(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]), + 13 => simd_shuffle16!(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), + 14 => simd_shuffle16!(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]), + _ => simd_shuffle16!(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]), }; transmute(r) } @@ -24439,22 +24439,22 @@ pub unsafe fn _mm256_alignr_epi32(a: __m256i, b: __m256i) -> __ let b = b.as_i32x8(); let imm8: i32 = IMM8 % 16; let r: i32x8 = match imm8 { - 0 => simd_shuffle8(a, b, [8, 9, 10, 11, 12, 13, 14, 15]), - 1 => simd_shuffle8(a, b, [9, 10, 11, 12, 13, 14, 15, 0]), - 2 => simd_shuffle8(a, b, [10, 11, 12, 13, 14, 15, 0, 1]), - 3 => simd_shuffle8(a, b, [11, 12, 13, 14, 15, 0, 1, 2]), - 4 => simd_shuffle8(a, b, [12, 13, 14, 15, 0, 1, 2, 3]), - 5 => simd_shuffle8(a, b, [13, 14, 15, 0, 1, 2, 3, 4]), - 6 => simd_shuffle8(a, b, [14, 15, 0, 1, 2, 3, 4, 5]), - 7 => simd_shuffle8(a, b, [15, 0, 1, 2, 3, 4, 5, 6]), - 8 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), - 9 => simd_shuffle8(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), - 10 => simd_shuffle8(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), - 11 => simd_shuffle8(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), - 12 => simd_shuffle8(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), - 13 => simd_shuffle8(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), - 14 => simd_shuffle8(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), - _ => simd_shuffle8(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + 0 => simd_shuffle8!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle8!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]), + 2 => simd_shuffle8!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]), + 3 => simd_shuffle8!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]), + 4 => simd_shuffle8!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]), + 5 => simd_shuffle8!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]), + 6 => simd_shuffle8!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]), + 7 => simd_shuffle8!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]), + 8 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 9 => simd_shuffle8!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 10 => simd_shuffle8!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 11 => simd_shuffle8!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 12 => simd_shuffle8!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 13 => simd_shuffle8!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 14 => simd_shuffle8!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + _ => simd_shuffle8!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), }; transmute(r) } @@ -24508,14 +24508,14 @@ pub unsafe fn _mm_alignr_epi32(a: __m128i, b: __m128i) -> __m12 let b = b.as_i32x4(); let imm8: i32 = IMM8 % 8; let r: i32x4 = match imm8 { - 0 => simd_shuffle4(a, b, [4, 5, 6, 7]), - 1 => simd_shuffle4(a, b, [5, 6, 7, 0]), - 2 => simd_shuffle4(a, b, [6, 7, 0, 1]), - 3 => simd_shuffle4(a, b, [7, 0, 1, 2]), - 4 => simd_shuffle4(a, b, [0, 1, 2, 3]), - 5 => simd_shuffle4(a, b, [1, 2, 3, 0]), - 6 => simd_shuffle4(a, b, [2, 3, 0, 1]), - _ => simd_shuffle4(a, b, [3, 0, 1, 2]), + 0 => simd_shuffle4!(a, b, [4, 5, 6, 7]), + 1 => simd_shuffle4!(a, b, [5, 6, 7, 0]), + 2 => simd_shuffle4!(a, b, [6, 7, 0, 1]), + 3 => simd_shuffle4!(a, b, [7, 0, 1, 2]), + 4 => simd_shuffle4!(a, b, [0, 1, 2, 3]), + 5 => simd_shuffle4!(a, b, [1, 2, 3, 0]), + 6 => simd_shuffle4!(a, b, [2, 3, 0, 1]), + _ => simd_shuffle4!(a, b, [3, 0, 1, 2]), }; transmute(r) } @@ -24567,14 +24567,14 @@ pub unsafe fn _mm512_alignr_epi64(a: __m512i, b: __m512i) -> __ static_assert_imm8!(IMM8); let imm8: i32 = IMM8 % 8; let r: i64x8 = match imm8 { - 0 => simd_shuffle8(a, b, [8, 9, 10, 11, 12, 13, 14, 15]), - 1 => simd_shuffle8(a, b, [9, 10, 11, 12, 13, 14, 15, 0]), - 2 => simd_shuffle8(a, b, [10, 11, 12, 13, 14, 15, 0, 1]), - 3 => simd_shuffle8(a, b, [11, 12, 13, 14, 15, 0, 1, 2]), - 4 => simd_shuffle8(a, b, [12, 13, 14, 15, 0, 1, 2, 3]), - 5 => simd_shuffle8(a, b, [13, 14, 15, 0, 1, 2, 3, 4]), - 6 => simd_shuffle8(a, b, [14, 15, 0, 1, 2, 3, 4, 5]), - _ => simd_shuffle8(a, b, [15, 0, 1, 2, 3, 4, 5, 6]), + 0 => simd_shuffle8!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle8!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]), + 2 => simd_shuffle8!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]), + 3 => simd_shuffle8!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]), + 4 => simd_shuffle8!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]), + 5 => simd_shuffle8!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]), + 6 => simd_shuffle8!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]), + _ => simd_shuffle8!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]), }; transmute(r) } @@ -24626,14 +24626,14 @@ pub unsafe fn _mm256_alignr_epi64(a: __m256i, b: __m256i) -> __ static_assert_imm8!(IMM8); let imm8: i32 = IMM8 % 8; let r: i64x4 = match imm8 { - 0 => simd_shuffle4(a, b, [4, 5, 6, 7]), - 1 => simd_shuffle4(a, b, [5, 6, 7, 0]), - 2 => simd_shuffle4(a, b, [6, 7, 0, 1]), - 3 => simd_shuffle4(a, b, [7, 0, 1, 2]), - 4 => simd_shuffle4(a, b, [0, 1, 2, 3]), - 5 => simd_shuffle4(a, b, [1, 2, 3, 4]), - 6 => simd_shuffle4(a, b, [2, 3, 4, 5]), - _ => simd_shuffle4(a, b, [3, 4, 5, 6]), + 0 => simd_shuffle4!(a, b, [4, 5, 6, 7]), + 1 => simd_shuffle4!(a, b, [5, 6, 7, 0]), + 2 => simd_shuffle4!(a, b, [6, 7, 0, 1]), + 3 => simd_shuffle4!(a, b, [7, 0, 1, 2]), + 4 => simd_shuffle4!(a, b, [0, 1, 2, 3]), + 5 => simd_shuffle4!(a, b, [1, 2, 3, 4]), + 6 => simd_shuffle4!(a, b, [2, 3, 4, 5]), + _ => simd_shuffle4!(a, b, [3, 4, 5, 6]), }; transmute(r) } @@ -24685,10 +24685,10 @@ pub unsafe fn _mm_alignr_epi64(a: __m128i, b: __m128i) -> __m12 static_assert_imm8!(IMM8); let imm8: i32 = IMM8 % 4; let r: i64x2 = match imm8 { - 0 => simd_shuffle2(a, b, [2, 3]), - 1 => simd_shuffle2(a, b, [3, 0]), - 2 => simd_shuffle2(a, b, [0, 1]), - _ => simd_shuffle2(a, b, [1, 2]), + 0 => simd_shuffle2!(a, b, [2, 3]), + 1 => simd_shuffle2!(a, b, [3, 0]), + 2 => simd_shuffle2!(a, b, [0, 1]), + _ => simd_shuffle2!(a, b, [1, 2]), }; transmute(r) } diff --git a/crates/core_arch/src/x86/sse.rs b/crates/core_arch/src/x86/sse.rs index f9a39e20a0..3e009531e1 100644 --- a/crates/core_arch/src/x86/sse.rs +++ b/crates/core_arch/src/x86/sse.rs @@ -350,7 +350,7 @@ pub unsafe fn _mm_cmple_ss(a: __m128, b: __m128) -> __m128 { #[cfg_attr(test, assert_instr(cmpltss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpgt_ss(a: __m128, b: __m128) -> __m128 { - simd_shuffle4(a, cmpss(b, a, 1), [4, 1, 2, 3]) + simd_shuffle4!(a, cmpss(b, a, 1), [4, 1, 2, 3]) } /// Compares the lowest `f32` of both inputs for greater than or equal. The @@ -364,7 +364,7 @@ pub unsafe fn _mm_cmpgt_ss(a: __m128, b: __m128) -> __m128 { #[cfg_attr(test, assert_instr(cmpless))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpge_ss(a: __m128, b: __m128) -> __m128 { - simd_shuffle4(a, cmpss(b, a, 2), [4, 1, 2, 3]) + simd_shuffle4!(a, cmpss(b, a, 2), [4, 1, 2, 3]) } /// Compares the lowest `f32` of both inputs for inequality. The lowest 32 bits @@ -420,7 +420,7 @@ pub unsafe fn _mm_cmpnle_ss(a: __m128, b: __m128) -> __m128 { #[cfg_attr(test, assert_instr(cmpnltss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpngt_ss(a: __m128, b: __m128) -> __m128 { - simd_shuffle4(a, cmpss(b, a, 5), [4, 1, 2, 3]) + simd_shuffle4!(a, cmpss(b, a, 5), [4, 1, 2, 3]) } /// Compares the lowest `f32` of both inputs for not-greater-than-or-equal. The @@ -434,7 +434,7 @@ pub unsafe fn _mm_cmpngt_ss(a: __m128, b: __m128) -> __m128 { #[cfg_attr(test, assert_instr(cmpnless))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpnge_ss(a: __m128, b: __m128) -> __m128 { - simd_shuffle4(a, cmpss(b, a, 6), [4, 1, 2, 3]) + simd_shuffle4!(a, cmpss(b, a, 6), [4, 1, 2, 3]) } /// Checks if the lowest `f32` of both inputs are ordered. The lowest 32 bits of @@ -1011,10 +1011,10 @@ pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_shuffle_ps(a: __m128, b: __m128) -> __m128 { static_assert_imm8!(MASK); - simd_shuffle4( + simd_shuffle4_param!( a, b, - [ + [ MASK as u32 & 0b11, (MASK as u32 >> 2) & 0b11, ((MASK as u32 >> 4) & 0b11) + 4, @@ -1032,7 +1032,7 @@ pub unsafe fn _mm_shuffle_ps(a: __m128, b: __m128) -> __m128 { #[cfg_attr(test, assert_instr(unpckhps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpackhi_ps(a: __m128, b: __m128) -> __m128 { - simd_shuffle4(a, b, [2, 6, 3, 7]) + simd_shuffle4!(a, b, [2, 6, 3, 7]) } /// Unpacks and interleave single-precision (32-bit) floating-point elements @@ -1044,7 +1044,7 @@ pub unsafe fn _mm_unpackhi_ps(a: __m128, b: __m128) -> __m128 { #[cfg_attr(test, assert_instr(unpcklps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpacklo_ps(a: __m128, b: __m128) -> __m128 { - simd_shuffle4(a, b, [0, 4, 1, 5]) + simd_shuffle4!(a, b, [0, 4, 1, 5]) } /// Combine higher half of `a` and `b`. The highwe half of `b` occupies the @@ -1057,7 +1057,7 @@ pub unsafe fn _mm_unpacklo_ps(a: __m128, b: __m128) -> __m128 { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128 { // TODO; figure why this is a different instruction on Windows? - simd_shuffle4(a, b, [6, 7, 2, 3]) + simd_shuffle4!(a, b, [6, 7, 2, 3]) } /// Combine lower half of `a` and `b`. The lower half of `b` occupies the @@ -1069,7 +1069,7 @@ pub unsafe fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128 { #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 { - simd_shuffle4(a, b, [0, 1, 4, 5]) + simd_shuffle4!(a, b, [0, 1, 4, 5]) } /// Returns a mask of the most significant bit of each element in `a`. @@ -1201,7 +1201,7 @@ pub unsafe fn _mm_loadu_ps(p: *const f32) -> __m128 { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_loadr_ps(p: *const f32) -> __m128 { let a = _mm_load_ps(p); - simd_shuffle4(a, a, [3, 2, 1, 0]) + simd_shuffle4!(a, a, [3, 2, 1, 0]) } /// Loads unaligned 64-bits of integer data from memory into new vector. @@ -1253,7 +1253,7 @@ pub unsafe fn _mm_store_ss(p: *mut f32, a: __m128) { #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_store1_ps(p: *mut f32, a: __m128) { - let b: __m128 = simd_shuffle4(a, a, [0, 0, 0, 0]); + let b: __m128 = simd_shuffle4!(a, a, [0, 0, 0, 0]); *(p as *mut __m128) = b; } @@ -1329,7 +1329,7 @@ pub unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128) { #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_storer_ps(p: *mut f32, a: __m128) { - let b: __m128 = simd_shuffle4(a, a, [3, 2, 1, 0]); + let b: __m128 = simd_shuffle4!(a, a, [3, 2, 1, 0]); *(p as *mut __m128) = b; } @@ -1347,7 +1347,7 @@ pub unsafe fn _mm_storer_ps(p: *mut f32, a: __m128) { #[cfg_attr(test, assert_instr(movss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_move_ss(a: __m128, b: __m128) -> __m128 { - simd_shuffle4(a, b, [4, 1, 2, 3]) + simd_shuffle4!(a, b, [4, 1, 2, 3]) } /// Performs a serializing operation on all store-to-memory instructions that diff --git a/crates/core_arch/src/x86/sse2.rs b/crates/core_arch/src/x86/sse2.rs index bbf98250ba..92e6bc4a3d 100644 --- a/crates/core_arch/src/x86/sse2.rs +++ b/crates/core_arch/src/x86/sse2.rs @@ -432,10 +432,10 @@ unsafe fn _mm_slli_si128_impl(a: __m128i) -> __m128i { } } let zero = _mm_set1_epi8(0).as_i8x16(); - transmute(simd_shuffle16::( + transmute::(simd_shuffle16_param!( zero, a.as_i8x16(), - [ + [ mask(IMM8, 0), mask(IMM8, 1), mask(IMM8, 2), @@ -635,10 +635,10 @@ unsafe fn _mm_srli_si128_impl(a: __m128i) -> __m128i { } } let zero = _mm_set1_epi8(0).as_i8x16(); - let x: i8x16 = simd_shuffle16( + let x: i8x16 = simd_shuffle16_param!( a.as_i8x16(), zero, - [ + [ mask(IMM8, 0), mask(IMM8, 1), mask(IMM8, 2), @@ -895,7 +895,7 @@ pub unsafe fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepi32_pd(a: __m128i) -> __m128d { let a = a.as_i32x4(); - simd_cast::(simd_shuffle2(a, a, [0, 1])) + simd_cast::(simd_shuffle2!(a, a, [0, 1])) } /// Returns `a` with its lower element replaced by `b` after converting it to @@ -1303,7 +1303,7 @@ pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i { let zero = _mm_setzero_si128(); - let r: i64x2 = simd_shuffle2(a.as_i64x2(), zero.as_i64x2(), [0, 2]); + let r: i64x2 = simd_shuffle2!(a.as_i64x2(), zero.as_i64x2(), [0, 2]); transmute(r) } @@ -1391,10 +1391,10 @@ pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 { pub unsafe fn _mm_shuffle_epi32(a: __m128i) -> __m128i { static_assert_imm8!(IMM8); let a = a.as_i32x4(); - let x: i32x4 = simd_shuffle4( + let x: i32x4 = simd_shuffle4_param!( a, a, - [ + [ IMM8 as u32 & 0b11, (IMM8 as u32 >> 2) & 0b11, (IMM8 as u32 >> 4) & 0b11, @@ -1419,10 +1419,10 @@ pub unsafe fn _mm_shuffle_epi32(a: __m128i) -> __m128i { pub unsafe fn _mm_shufflehi_epi16(a: __m128i) -> __m128i { static_assert_imm8!(IMM8); let a = a.as_i16x8(); - let x: i16x8 = simd_shuffle8( + let x: i16x8 = simd_shuffle8_param!( a, a, - [ + [ 0, 1, 2, @@ -1451,10 +1451,10 @@ pub unsafe fn _mm_shufflehi_epi16(a: __m128i) -> __m128i { pub unsafe fn _mm_shufflelo_epi16(a: __m128i) -> __m128i { static_assert_imm8!(IMM8); let a = a.as_i16x8(); - let x: i16x8 = simd_shuffle8( + let x: i16x8 = simd_shuffle8_param!( a, a, - [ + [ IMM8 as u32 & 0b11, (IMM8 as u32 >> 2) & 0b11, (IMM8 as u32 >> 4) & 0b11, @@ -1476,7 +1476,7 @@ pub unsafe fn _mm_shufflelo_epi16(a: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(punpckhbw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i { - transmute::(simd_shuffle16( + transmute::(simd_shuffle16!( a.as_i8x16(), b.as_i8x16(), [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31], @@ -1491,7 +1491,7 @@ pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(punpckhwd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i { - let x = simd_shuffle8(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]); + let x = simd_shuffle8!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]); transmute::(x) } @@ -1503,7 +1503,7 @@ pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(unpckhps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i { - transmute::(simd_shuffle4(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) + transmute::(simd_shuffle4!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) } /// Unpacks and interleave 64-bit integers from the high half of `a` and `b`. @@ -1514,7 +1514,7 @@ pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(unpckhpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i { - transmute::(simd_shuffle2(a.as_i64x2(), b.as_i64x2(), [1, 3])) + transmute::(simd_shuffle2!(a.as_i64x2(), b.as_i64x2(), [1, 3])) } /// Unpacks and interleave 8-bit integers from the low half of `a` and `b`. @@ -1525,7 +1525,7 @@ pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(punpcklbw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i { - transmute::(simd_shuffle16( + transmute::(simd_shuffle16!( a.as_i8x16(), b.as_i8x16(), [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23], @@ -1540,7 +1540,7 @@ pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(punpcklwd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i { - let x = simd_shuffle8(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]); + let x = simd_shuffle8!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]); transmute::(x) } @@ -1552,7 +1552,7 @@ pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(unpcklps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i { - transmute::(simd_shuffle4(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) + transmute::(simd_shuffle4!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) } /// Unpacks and interleave 64-bit integers from the low half of `a` and `b`. @@ -1563,7 +1563,7 @@ pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i { - transmute::(simd_shuffle2(a.as_i64x2(), b.as_i64x2(), [0, 2])) + transmute::(simd_shuffle2!(a.as_i64x2(), b.as_i64x2(), [0, 2])) } /// Returns a new vector with the low element of `a` replaced by the sum of the @@ -2519,7 +2519,7 @@ pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) { #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) { - let b: __m128d = simd_shuffle2(a, a, [0, 0]); + let b: __m128d = simd_shuffle2!(a, a, [0, 0]); *(mem_addr as *mut __m128d) = b; } @@ -2533,7 +2533,7 @@ pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) { #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) { - let b: __m128d = simd_shuffle2(a, a, [0, 0]); + let b: __m128d = simd_shuffle2!(a, a, [0, 0]); *(mem_addr as *mut __m128d) = b; } @@ -2548,7 +2548,7 @@ pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) { #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) { - let b: __m128d = simd_shuffle2(a, a, [1, 0]); + let b: __m128d = simd_shuffle2!(a, a, [1, 0]); *(mem_addr as *mut __m128d) = b; } @@ -2612,7 +2612,7 @@ pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d { let a = _mm_load_pd(mem_addr); - simd_shuffle2(a, a, [1, 0]) + simd_shuffle2!(a, a, [1, 0]) } /// Loads 128-bits (composed of 2 packed double-precision (64-bit) @@ -2653,7 +2653,7 @@ pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_shuffle_pd(a: __m128d, b: __m128d) -> __m128d { static_assert_imm8!(MASK); - simd_shuffle2(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) + simd_shuffle2_param!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) } /// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower @@ -2777,7 +2777,7 @@ pub unsafe fn _mm_undefined_si128() -> __m128i { #[cfg_attr(test, assert_instr(unpckhpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d { - simd_shuffle2(a, b, [1, 3]) + simd_shuffle2!(a, b, [1, 3]) } /// The resulting `__m128d` element is composed by the high-order values of @@ -2792,7 +2792,7 @@ pub unsafe fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d { #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d { - simd_shuffle2(a, b, [0, 2]) + simd_shuffle2!(a, b, [0, 2]) } #[allow(improper_ctypes)] diff --git a/crates/core_arch/src/x86/sse3.rs b/crates/core_arch/src/x86/sse3.rs index 977de1dc17..ab0dd38fed 100644 --- a/crates/core_arch/src/x86/sse3.rs +++ b/crates/core_arch/src/x86/sse3.rs @@ -106,7 +106,7 @@ pub unsafe fn _mm_lddqu_si128(mem_addr: *const __m128i) -> __m128i { #[cfg_attr(test, assert_instr(movddup))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_movedup_pd(a: __m128d) -> __m128d { - simd_shuffle2(a, a, [0, 0]) + simd_shuffle2!(a, a, [0, 0]) } /// Loads a double-precision (64-bit) floating-point element from memory @@ -130,7 +130,7 @@ pub unsafe fn _mm_loaddup_pd(mem_addr: *const f64) -> __m128d { #[cfg_attr(test, assert_instr(movshdup))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_movehdup_ps(a: __m128) -> __m128 { - simd_shuffle4(a, a, [1, 1, 3, 3]) + simd_shuffle4!(a, a, [1, 1, 3, 3]) } /// Duplicate even-indexed single-precision (32-bit) floating-point elements @@ -142,7 +142,7 @@ pub unsafe fn _mm_movehdup_ps(a: __m128) -> __m128 { #[cfg_attr(test, assert_instr(movsldup))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_moveldup_ps(a: __m128) -> __m128 { - simd_shuffle4(a, a, [0, 0, 2, 2]) + simd_shuffle4!(a, a, [0, 0, 2, 2]) } #[allow(improper_ctypes)] diff --git a/crates/core_arch/src/x86/sse41.rs b/crates/core_arch/src/x86/sse41.rs index 6bc0825d10..6e607d4832 100644 --- a/crates/core_arch/src/x86/sse41.rs +++ b/crates/core_arch/src/x86/sse41.rs @@ -379,7 +379,7 @@ pub unsafe fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i { let a = a.as_i8x16(); - let a = simd_shuffle8::<_, i8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + let a: i8x8 = simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); transmute(simd_cast::<_, i16x8>(a)) } @@ -392,7 +392,7 @@ pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i { let a = a.as_i8x16(); - let a = simd_shuffle4::<_, i8x4>(a, a, [0, 1, 2, 3]); + let a: i8x4 = simd_shuffle4!(a, a, [0, 1, 2, 3]); transmute(simd_cast::<_, i32x4>(a)) } @@ -406,7 +406,7 @@ pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i { let a = a.as_i8x16(); - let a = simd_shuffle2::<_, i8x2>(a, a, [0, 1]); + let a: i8x2 = simd_shuffle2!(a, a, [0, 1]); transmute(simd_cast::<_, i64x2>(a)) } @@ -419,7 +419,7 @@ pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i { let a = a.as_i16x8(); - let a = simd_shuffle4::<_, i16x4>(a, a, [0, 1, 2, 3]); + let a: i16x4 = simd_shuffle4!(a, a, [0, 1, 2, 3]); transmute(simd_cast::<_, i32x4>(a)) } @@ -432,7 +432,7 @@ pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i { let a = a.as_i16x8(); - let a = simd_shuffle2::<_, i16x2>(a, a, [0, 1]); + let a: i16x2 = simd_shuffle2!(a, a, [0, 1]); transmute(simd_cast::<_, i64x2>(a)) } @@ -445,7 +445,7 @@ pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i { let a = a.as_i32x4(); - let a = simd_shuffle2::<_, i32x2>(a, a, [0, 1]); + let a: i32x2 = simd_shuffle2!(a, a, [0, 1]); transmute(simd_cast::<_, i64x2>(a)) } @@ -458,7 +458,7 @@ pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i { let a = a.as_u8x16(); - let a = simd_shuffle8::<_, u8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + let a: u8x8 = simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); transmute(simd_cast::<_, i16x8>(a)) } @@ -471,7 +471,7 @@ pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i { let a = a.as_u8x16(); - let a = simd_shuffle4::<_, u8x4>(a, a, [0, 1, 2, 3]); + let a: u8x4 = simd_shuffle4!(a, a, [0, 1, 2, 3]); transmute(simd_cast::<_, i32x4>(a)) } @@ -484,7 +484,7 @@ pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i { let a = a.as_u8x16(); - let a = simd_shuffle2::<_, u8x2>(a, a, [0, 1]); + let a: u8x2 = simd_shuffle2!(a, a, [0, 1]); transmute(simd_cast::<_, i64x2>(a)) } @@ -498,7 +498,7 @@ pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i { let a = a.as_u16x8(); - let a = simd_shuffle4::<_, u16x4>(a, a, [0, 1, 2, 3]); + let a: u16x4 = simd_shuffle4!(a, a, [0, 1, 2, 3]); transmute(simd_cast::<_, i32x4>(a)) } @@ -512,7 +512,7 @@ pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i { let a = a.as_u16x8(); - let a = simd_shuffle2::<_, u16x2>(a, a, [0, 1]); + let a: u16x2 = simd_shuffle2!(a, a, [0, 1]); transmute(simd_cast::<_, i64x2>(a)) } @@ -526,7 +526,7 @@ pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i { let a = a.as_u32x4(); - let a = simd_shuffle2::<_, u32x2>(a, a, [0, 1]); + let a: u32x2 = simd_shuffle2!(a, a, [0, 1]); transmute(simd_cast::<_, i64x2>(a)) } diff --git a/crates/core_arch/src/x86/ssse3.rs b/crates/core_arch/src/x86/ssse3.rs index 2cd92b3d65..9b815dcd6c 100644 --- a/crates/core_arch/src/x86/ssse3.rs +++ b/crates/core_arch/src/x86/ssse3.rs @@ -113,10 +113,10 @@ pub unsafe fn _mm_alignr_epi8(a: __m128i, b: __m128i) -> __m128 shift + i } } - let r: i8x16 = simd_shuffle16( + let r: i8x16 = simd_shuffle16_param!( b.as_i8x16(), a.as_i8x16(), - [ + [ mask(IMM8 as u32, 0), mask(IMM8 as u32, 1), mask(IMM8 as u32, 2), diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec index 586a4fbe37..bdf72621eb 100644 --- a/crates/stdarch-gen/neon.spec +++ b/crates/stdarch-gen/neon.spec @@ -194,8 +194,8 @@ generate int32x2_t:int32x2_t:int64x2_t /// Unsigned Absolute difference Long name = vabdl_high no-q -multi_fn = simd_shuffle8, c:uint8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15] -multi_fn = simd_shuffle8, d:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] +multi_fn = simd_shuffle8!, c:uint8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15] +multi_fn = simd_shuffle8!, d:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] multi_fn = simd_cast, {vabd_u8, c, d} a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 @@ -207,8 +207,8 @@ generate uint8x16_t:uint8x16_t:uint16x8_t /// Unsigned Absolute difference Long name = vabdl_high no-q -multi_fn = simd_shuffle4, c:uint16x4_t, a, a, [4, 5, 6, 7] -multi_fn = simd_shuffle4, d:uint16x4_t, b, b, [4, 5, 6, 7] +multi_fn = simd_shuffle4!, c:uint16x4_t, a, a, [4, 5, 6, 7] +multi_fn = simd_shuffle4!, d:uint16x4_t, b, b, [4, 5, 6, 7] multi_fn = simd_cast, {vabd_u16, c, d} a = 1, 2, 3, 4, 8, 9, 11, 12 b = 10, 10, 10, 10, 10, 10, 10, 10 @@ -220,8 +220,8 @@ generate uint16x8_t:uint16x8_t:uint32x4_t /// Unsigned Absolute difference Long name = vabdl_high no-q -multi_fn = simd_shuffle2, c:uint32x2_t, a, a, [2, 3] -multi_fn = simd_shuffle2, d:uint32x2_t, b, b, [2, 3] +multi_fn = simd_shuffle2!, c:uint32x2_t, a, a, [2, 3] +multi_fn = simd_shuffle2!, d:uint32x2_t, b, b, [2, 3] multi_fn = simd_cast, {vabd_u32, c, d} a = 1, 2, 3, 4 b = 10, 10, 10, 10 @@ -233,8 +233,8 @@ generate uint32x4_t:uint32x4_t:uint64x2_t /// Signed Absolute difference Long name = vabdl_high no-q -multi_fn = simd_shuffle8, c:int8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15] -multi_fn = simd_shuffle8, d:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] +multi_fn = simd_shuffle8!, c:int8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15] +multi_fn = simd_shuffle8!, d:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] multi_fn = simd_cast, e:uint8x8_t, {vabd_s8, c, d} multi_fn = simd_cast, e a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 @@ -247,8 +247,8 @@ generate int8x16_t:int8x16_t:int16x8_t /// Signed Absolute difference Long name = vabdl_high no-q -multi_fn = simd_shuffle4, c:int16x4_t, a, a, [4, 5, 6, 7] -multi_fn = simd_shuffle4, d:int16x4_t, b, b, [4, 5, 6, 7] +multi_fn = simd_shuffle4!, c:int16x4_t, a, a, [4, 5, 6, 7] +multi_fn = simd_shuffle4!, d:int16x4_t, b, b, [4, 5, 6, 7] multi_fn = simd_cast, e:uint16x4_t, {vabd_s16, c, d} multi_fn = simd_cast, e a = 1, 2, 3, 4, 9, 10, 11, 12 @@ -261,8 +261,8 @@ generate int16x8_t:int16x8_t:int32x4_t /// Signed Absolute difference Long name = vabdl_high no-q -multi_fn = simd_shuffle2, c:int32x2_t, a, a, [2, 3] -multi_fn = simd_shuffle2, d:int32x2_t, b, b, [2, 3] +multi_fn = simd_shuffle2!, c:int32x2_t, a, a, [2, 3] +multi_fn = simd_shuffle2!, d:int32x2_t, b, b, [2, 3] multi_fn = simd_cast, e:uint32x2_t, {vabd_s32, c, d} multi_fn = simd_cast, e a = 1, 2, 3, 4 @@ -897,7 +897,7 @@ generate float32x2_t:float64x2_t /// Floating-point convert to higher precision long name = vcvt_high noq-double-suffixes -multi_fn = simd_shuffle2, b:float32x2_t, a, a, [2, 3] +multi_fn = simd_shuffle2!, b:float32x2_t, a, a, [2, 3] multi_fn = simd_cast, b a = -1.2, 1.2, 2.3, 3.4 validate 2.3f32 as f64, 3.4f32 as f64 @@ -918,7 +918,7 @@ generate float64x2_t:float32x2_t /// Floating-point convert to lower precision narrow name = vcvt_high noq-double-suffixes -multi_fn = simd_shuffle4, a, {simd_cast, b}, [0, 1, 2, 3] +multi_fn = simd_shuffle4!, a, {simd_cast, b}, [0, 1, 2, 3] a = -1.2, 1.2 b = -2.3, 3.4 validate -1.2, 1.2, -2.3f64 as f32, 3.4f64 as f32 @@ -939,7 +939,7 @@ generate float64x2_t:float32x2_t /// Floating-point convert to lower precision narrow, rounding to odd name = vcvtx_high noq-double-suffixes -multi_fn = simd_shuffle4, a, {vcvtx-noq_doubleself-noext, b}, [0, 1, 2, 3] +multi_fn = simd_shuffle4!, a, {vcvtx-noq_doubleself-noext, b}, [0, 1, 2, 3] a = -1.0, 2.0 b = -3.0, 4.0 validate -1.0, 2.0, -3.0, 4.0 @@ -2629,7 +2629,7 @@ generate uint16x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint32x4_t, uint /// Signed Subtract Wide name = vsubw_high no-q -multi_fn = simd_shuffle8, c:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] +multi_fn = simd_shuffle8!, c:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] multi_fn = simd_sub, a, {simd_cast, c} a = 8, 9, 10, 12, 13, 14, 15, 16 b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16 @@ -2641,7 +2641,7 @@ generate int16x8_t:int8x16_t:int16x8_t /// Signed Subtract Wide name = vsubw_high no-q -multi_fn = simd_shuffle4, c:int16x4_t, b, b, [4, 5, 6, 7] +multi_fn = simd_shuffle4!, c:int16x4_t, b, b, [4, 5, 6, 7] multi_fn = simd_sub, a, {simd_cast, c} a = 8, 9, 10, 11 b = 0, 1, 2, 3, 8, 9, 10, 11 @@ -2653,7 +2653,7 @@ generate int32x4_t:int16x8_t:int32x4_t /// Signed Subtract Wide name = vsubw_high no-q -multi_fn = simd_shuffle2, c:int32x2_t, b, b, [2, 3] +multi_fn = simd_shuffle2!, c:int32x2_t, b, b, [2, 3] multi_fn = simd_sub, a, {simd_cast, c} a = 8, 9 b = 6, 7, 8, 9 @@ -2665,7 +2665,7 @@ generate int64x2_t:int32x4_t:int64x2_t /// Unsigned Subtract Wide name = vsubw_high no-q -multi_fn = simd_shuffle8, c:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] +multi_fn = simd_shuffle8!, c:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] multi_fn = simd_sub, a, {simd_cast, c} a = 8, 9, 10, 11, 12, 13, 14, 15 b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 @@ -2677,7 +2677,7 @@ generate uint16x8_t:uint8x16_t:uint16x8_t /// Unsigned Subtract Wide name = vsubw_high no-q -multi_fn = simd_shuffle4, c:uint16x4_t, b, b, [4, 5, 6, 7] +multi_fn = simd_shuffle4!, c:uint16x4_t, b, b, [4, 5, 6, 7] multi_fn = simd_sub, a, {simd_cast, c} a = 8, 9, 10, 11 b = 0, 1, 2, 3, 8, 9, 10, 11 @@ -2689,7 +2689,7 @@ generate uint32x4_t:uint16x8_t:uint32x4_t /// Unsigned Subtract Wide name = vsubw_high no-q -multi_fn = simd_shuffle2, c:uint32x2_t, b, b, [2, 3] +multi_fn = simd_shuffle2!, c:uint32x2_t, b, b, [2, 3] multi_fn = simd_sub, a, {simd_cast, c} a = 8, 9 b = 6, 7, 8, 9 @@ -2731,9 +2731,9 @@ generate uint8x8_t:uint8x8_t:uint16x8_t, uint16x4_t:uint16x4_t:uint32x4_t, uint3 /// Signed Subtract Long name = vsubl_high no-q -multi_fn = simd_shuffle8, c:int8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15] +multi_fn = simd_shuffle8!, c:int8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15] multi_fn = simd_cast, d:out_t, c -multi_fn = simd_shuffle8, e:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] +multi_fn = simd_shuffle8!, e:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] multi_fn = simd_cast, f:out_t, e multi_fn = simd_sub, d, f @@ -2747,9 +2747,9 @@ generate int8x16_t:int8x16_t:int16x8_t /// Signed Subtract Long name = vsubl_high no-q -multi_fn = simd_shuffle4, c:int16x4_t, a, a, [4, 5, 6, 7] +multi_fn = simd_shuffle4!, c:int16x4_t, a, a, [4, 5, 6, 7] multi_fn = simd_cast, d:out_t, c -multi_fn = simd_shuffle4, e:int16x4_t, b, b, [4, 5, 6, 7] +multi_fn = simd_shuffle4!, e:int16x4_t, b, b, [4, 5, 6, 7] multi_fn = simd_cast, f:out_t, e multi_fn = simd_sub, d, f @@ -2763,9 +2763,9 @@ generate int16x8_t:int16x8_t:int32x4_t /// Signed Subtract Long name = vsubl_high no-q -multi_fn = simd_shuffle2, c:int32x2_t, a, a, [2, 3] +multi_fn = simd_shuffle2!, c:int32x2_t, a, a, [2, 3] multi_fn = simd_cast, d:out_t, c -multi_fn = simd_shuffle2, e:int32x2_t, b, b, [2, 3] +multi_fn = simd_shuffle2!, e:int32x2_t, b, b, [2, 3] multi_fn = simd_cast, f:out_t, e multi_fn = simd_sub, d, f @@ -2779,9 +2779,9 @@ generate int32x4_t:int32x4_t:int64x2_t /// Unsigned Subtract Long name = vsubl_high no-q -multi_fn = simd_shuffle8, c:uint8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15] +multi_fn = simd_shuffle8!, c:uint8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15] multi_fn = simd_cast, d:out_t, c -multi_fn = simd_shuffle8, e:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] +multi_fn = simd_shuffle8!, e:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] multi_fn = simd_cast, f:out_t, e multi_fn = simd_sub, d, f @@ -2795,9 +2795,9 @@ generate uint8x16_t:uint8x16_t:uint16x8_t /// Unsigned Subtract Long name = vsubl_high no-q -multi_fn = simd_shuffle4, c:uint16x4_t, a, a, [4, 5, 6, 7] +multi_fn = simd_shuffle4!, c:uint16x4_t, a, a, [4, 5, 6, 7] multi_fn = simd_cast, d:out_t, c -multi_fn = simd_shuffle4, e:uint16x4_t, b, b, [4, 5, 6, 7] +multi_fn = simd_shuffle4!, e:uint16x4_t, b, b, [4, 5, 6, 7] multi_fn = simd_cast, f:out_t, e multi_fn = simd_sub, d, f @@ -2811,9 +2811,9 @@ generate uint16x8_t:uint16x8_t:uint32x4_t /// Unsigned Subtract Long name = vsubl_high no-q -multi_fn = simd_shuffle2, c:uint32x2_t, a, a, [2, 3] +multi_fn = simd_shuffle2!, c:uint32x2_t, a, a, [2, 3] multi_fn = simd_cast, d:out_t, c -multi_fn = simd_shuffle2, e:uint32x2_t, b, b, [2, 3] +multi_fn = simd_shuffle2!, e:uint32x2_t, b, b, [2, 3] multi_fn = simd_cast, f:out_t, e multi_fn = simd_sub, d, f @@ -4717,8 +4717,8 @@ generate uint16x8_t:uint8x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint16 /// Unsigned Absolute difference and Accumulate Long name = vabal_high no-q -multi_fn = simd_shuffle8, d:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] -multi_fn = simd_shuffle8, e:uint8x8_t, c, c, [8, 9, 10, 11, 12, 13, 14, 15] +multi_fn = simd_shuffle8!, d:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] +multi_fn = simd_shuffle8!, e:uint8x8_t, c, c, [8, 9, 10, 11, 12, 13, 14, 15] multi_fn = vabd_u8, d, e, f:uint8x8_t multi_fn = simd_add, a, {simd_cast, f} a = 9, 10, 11, 12, 13, 14, 15, 16 @@ -4732,8 +4732,8 @@ generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t /// Unsigned Absolute difference and Accumulate Long name = vabal_high no-q -multi_fn = simd_shuffle4, d:uint16x4_t, b, b, [4, 5, 6, 7] -multi_fn = simd_shuffle4, e:uint16x4_t, c, c, [4, 5, 6, 7] +multi_fn = simd_shuffle4!, d:uint16x4_t, b, b, [4, 5, 6, 7] +multi_fn = simd_shuffle4!, e:uint16x4_t, c, c, [4, 5, 6, 7] multi_fn = vabd_u16, d, e, f:uint16x4_t multi_fn = simd_add, a, {simd_cast, f} a = 9, 10, 11, 12 @@ -4747,8 +4747,8 @@ generate uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t /// Unsigned Absolute difference and Accumulate Long name = vabal_high no-q -multi_fn = simd_shuffle2, d:uint32x2_t, b, b, [2, 3] -multi_fn = simd_shuffle2, e:uint32x2_t, c, c, [2, 3] +multi_fn = simd_shuffle2!, d:uint32x2_t, b, b, [2, 3] +multi_fn = simd_shuffle2!, e:uint32x2_t, c, c, [2, 3] multi_fn = vabd_u32, d, e, f:uint32x2_t multi_fn = simd_add, a, {simd_cast, f} a = 15, 16 @@ -4808,8 +4808,8 @@ generate int64x2_t:int32x2_t:int32x2_t:int64x2_t /// Signed Absolute difference and Accumulate Long name = vabal_high no-q -multi_fn = simd_shuffle8, d:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] -multi_fn = simd_shuffle8, e:int8x8_t, c, c, [8, 9, 10, 11, 12, 13, 14, 15] +multi_fn = simd_shuffle8!, d:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] +multi_fn = simd_shuffle8!, e:int8x8_t, c, c, [8, 9, 10, 11, 12, 13, 14, 15] multi_fn = vabd_s8, d, e, f:int8x8_t multi_fn = simd_cast, f:uint8x8_t, f multi_fn = simd_add, a, {simd_cast, f} @@ -4824,8 +4824,8 @@ generate int16x8_t:int8x16_t:int8x16_t:int16x8_t /// Signed Absolute difference and Accumulate Long name = vabal_high no-q -multi_fn = simd_shuffle4, d:int16x4_t, b, b, [4, 5, 6, 7] -multi_fn = simd_shuffle4, e:int16x4_t, c, c, [4, 5, 6, 7] +multi_fn = simd_shuffle4!, d:int16x4_t, b, b, [4, 5, 6, 7] +multi_fn = simd_shuffle4!, e:int16x4_t, c, c, [4, 5, 6, 7] multi_fn = vabd_s16, d, e, f:int16x4_t multi_fn = simd_cast, f:uint16x4_t, f multi_fn = simd_add, a, {simd_cast, f} @@ -4840,8 +4840,8 @@ generate int32x4_t:int16x8_t:int16x8_t:int32x4_t /// Signed Absolute difference and Accumulate Long name = vabal_high no-q -multi_fn = simd_shuffle2, d:int32x2_t, b, b, [2, 3] -multi_fn = simd_shuffle2, e:int32x2_t, c, c, [2, 3] +multi_fn = simd_shuffle2!, d:int32x2_t, b, b, [2, 3] +multi_fn = simd_shuffle2!, e:int32x2_t, c, c, [2, 3] multi_fn = vabd_s32, d, e, f:int32x2_t multi_fn = simd_cast, f:uint32x2_t, f multi_fn = simd_add, a, {simd_cast, f} From 2d0a7518383720defe14754e726f5d48b2f41603 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 11 May 2021 11:27:43 +0200 Subject: [PATCH 2/5] fmt --- crates/core_arch/src/x86/avx2.rs | 3 ++- crates/core_arch/src/x86/avx512f.rs | 8 ++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/crates/core_arch/src/x86/avx2.rs b/crates/core_arch/src/x86/avx2.rs index 108ba0f0bb..295737dbbb 100644 --- a/crates/core_arch/src/x86/avx2.rs +++ b/crates/core_arch/src/x86/avx2.rs @@ -1712,7 +1712,8 @@ pub unsafe fn _mm256_inserti128_si256(a: __m256i, b: __m128i) - static_assert_imm1!(IMM1); let a = a.as_i64x4(); let b = _mm256_castsi128_si256(b).as_i64x4(); - let dst: i64x4 = simd_shuffle4_param!(a, b, [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]); + let dst: i64x4 = + simd_shuffle4_param!(a, b, [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]); transmute(dst) } diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 18c6b296d7..3d750cd84d 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -24329,16 +24329,12 @@ pub unsafe fn _mm512_alignr_epi32(a: __m512i, b: __m512i) -> __ 0 => simd_shuffle16!( a, b, - [ - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - ], + [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,], ), 1 => simd_shuffle16!( a, b, - [ - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, - ], + [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,], ), 2 => simd_shuffle16!( a, From d6f09aa0374a09c49bc1d926fdf731524d10f043 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 11 May 2021 13:26:24 +0200 Subject: [PATCH 3/5] unify shuffle and shuffle_param macros --- crates/core_arch/src/macros.rs | 100 ++++++++++----------------- crates/core_arch/src/x86/avx.rs | 28 ++++---- crates/core_arch/src/x86/avx2.rs | 22 +++--- crates/core_arch/src/x86/avx512bw.rs | 6 +- crates/core_arch/src/x86/avx512f.rs | 34 ++++----- crates/core_arch/src/x86/sse.rs | 2 +- crates/core_arch/src/x86/sse2.rs | 12 ++-- crates/core_arch/src/x86/ssse3.rs | 2 +- 8 files changed, 91 insertions(+), 115 deletions(-) diff --git a/crates/core_arch/src/macros.rs b/crates/core_arch/src/macros.rs index ce59a0ab8b..108760e867 100644 --- a/crates/core_arch/src/macros.rs +++ b/crates/core_arch/src/macros.rs @@ -95,6 +95,14 @@ macro_rules! types { #[allow(unused_macros)] macro_rules! simd_shuffle2 { + ($x:expr, $y:expr, <$(const $imm:ident : $ty:ty),+> $idx:expr $(,)?) => {{ + struct ConstParam<$(const $imm: $ty),+>; + impl<$(const $imm: $ty),+> ConstParam<$($imm),+> { + const IDX: [u32; 2] = $idx; + } + + simd_shuffle2($x, $y, ConstParam::<$($imm),+>::IDX) + }}; ($x:expr, $y:expr, $idx:expr $(,)?) => {{ const IDX: [u32; 2] = $idx; simd_shuffle2($x, $y, IDX) @@ -102,19 +110,15 @@ macro_rules! simd_shuffle2 { } #[allow(unused_macros)] -macro_rules! simd_shuffle2_param { - ($x:expr, $y:expr, $idx:expr $(,)?) => {{ - struct ConstParam; - impl ConstParam<$imm> { - const IDX: [u32; 2] = $idx; +macro_rules! simd_shuffle4 { + ($x:expr, $y:expr, <$(const $imm:ident : $ty:ty),+> $idx:expr $(,)?) => {{ + struct ConstParam<$(const $imm: $ty),+>; + impl<$(const $imm: $ty),+> ConstParam<$($imm),+> { + const IDX: [u32; 4] = $idx; } - simd_shuffle2($x, $y, ConstParam::<$imm>::IDX) + simd_shuffle4($x, $y, ConstParam::<$($imm),+>::IDX) }}; -} - -#[allow(unused_macros)] -macro_rules! simd_shuffle4 { ($x:expr, $y:expr, $idx:expr $(,)?) => {{ const IDX: [u32; 4] = $idx; simd_shuffle4($x, $y, IDX) @@ -122,19 +126,15 @@ macro_rules! simd_shuffle4 { } #[allow(unused_macros)] -macro_rules! simd_shuffle4_param { - ($x:expr, $y:expr, $idx:expr $(,)?) => {{ - struct ConstParam; - impl ConstParam<$imm> { - const IDX: [u32; 4] = $idx; +macro_rules! simd_shuffle8 { + ($x:expr, $y:expr, <$(const $imm:ident : $ty:ty),+> $idx:expr $(,)?) => {{ + struct ConstParam<$(const $imm: $ty),+>; + impl<$(const $imm: $ty),+> ConstParam<$($imm),+> { + const IDX: [u32; 8] = $idx; } - simd_shuffle4($x, $y, ConstParam::<$imm>::IDX) + simd_shuffle8($x, $y, ConstParam::<$($imm),+>::IDX) }}; -} - -#[allow(unused_macros)] -macro_rules! simd_shuffle8 { ($x:expr, $y:expr, $idx:expr $(,)?) => {{ const IDX: [u32; 8] = $idx; simd_shuffle8($x, $y, IDX) @@ -142,19 +142,15 @@ macro_rules! simd_shuffle8 { } #[allow(unused_macros)] -macro_rules! simd_shuffle8_param { - ($x:expr, $y:expr, $idx:expr $(,)?) => {{ - struct ConstParam; - impl ConstParam<$imm> { - const IDX: [u32; 8] = $idx; +macro_rules! simd_shuffle16 { + ($x:expr, $y:expr, <$(const $imm:ident : $ty:ty),+> $idx:expr $(,)?) => {{ + struct ConstParam<$(const $imm: $ty),+>; + impl<$(const $imm: $ty),+> ConstParam<$($imm),+> { + const IDX: [u32; 16] = $idx; } - simd_shuffle8($x, $y, ConstParam::<$imm>::IDX) + simd_shuffle16($x, $y, ConstParam::<$($imm),+>::IDX) }}; -} - -#[allow(unused_macros)] -macro_rules! simd_shuffle16 { ($x:expr, $y:expr, $idx:expr $(,)?) => {{ const IDX: [u32; 16] = $idx; simd_shuffle16($x, $y, IDX) @@ -162,19 +158,15 @@ macro_rules! simd_shuffle16 { } #[allow(unused_macros)] -macro_rules! simd_shuffle16_param { - ($x:expr, $y:expr, $idx:expr $(,)?) => {{ - struct ConstParam; - impl ConstParam<$imm> { - const IDX: [u32; 16] = $idx; +macro_rules! simd_shuffle32 { + ($x:expr, $y:expr, <$(const $imm:ident : $ty:ty),+> $idx:expr $(,)?) => {{ + struct ConstParam<$(const $imm: $ty),+>; + impl<$(const $imm: $ty),+> ConstParam<$($imm),+> { + const IDX: [u32; 32] = $idx; } - simd_shuffle16($x, $y, ConstParam::<$imm>::IDX) + simd_shuffle32($x, $y, ConstParam::<$($imm),+>::IDX) }}; -} - -#[allow(unused_macros)] -macro_rules! simd_shuffle32 { ($x:expr, $y:expr, $idx:expr $(,)?) => {{ const IDX: [u32; 32] = $idx; simd_shuffle32($x, $y, IDX) @@ -182,33 +174,17 @@ macro_rules! simd_shuffle32 { } #[allow(unused_macros)] -macro_rules! simd_shuffle32_param { - ($x:expr, $y:expr, $idx:expr $(,)?) => {{ - struct ConstParam; - impl ConstParam<$imm> { - const IDX: [u32; 32] = $idx; +macro_rules! simd_shuffle64 { + ($x:expr, $y:expr, <$(const $imm:ident : $ty:ty),+> $idx:expr $(,)?) => {{ + struct ConstParam<$(const $imm: $ty),+>; + impl<$(const $imm: $ty),+> ConstParam<$($imm),+> { + const IDX: [u32; 64] = $idx; } - simd_shuffle32($x, $y, ConstParam::<$imm>::IDX) + simd_shuffle64($x, $y, ConstParam::<$($imm),+>::IDX) }}; -} - -#[allow(unused_macros)] -macro_rules! simd_shuffle64 { ($x:expr, $y:expr, $idx:expr $(,)?) => {{ const IDX: [u32; 64] = $idx; simd_shuffle64($x, $y, IDX) }}; } - -#[allow(unused_macros)] -macro_rules! simd_shuffle64_param { - ($x:expr, $y:expr, $idx:expr $(,)?) => {{ - struct ConstParam; - impl ConstParam<$imm> { - const IDX: [u32; 64] = $idx; - } - - simd_shuffle64($x, $y, ConstParam::<$imm>::IDX) - }}; -} diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs index 320073b016..7dfc928bd4 100644 --- a/crates/core_arch/src/x86/avx.rs +++ b/crates/core_arch/src/x86/avx.rs @@ -118,7 +118,7 @@ pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d) -> __m256d { static_assert_imm8!(MASK); - simd_shuffle4_param!( + simd_shuffle4!( a, b, [ @@ -141,7 +141,7 @@ pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d) -> __m2 #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_shuffle_ps(a: __m256, b: __m256) -> __m256 { static_assert_imm8!(MASK); - simd_shuffle8_param!( + simd_shuffle8!( a, b, [ @@ -463,7 +463,7 @@ pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d) -> __m256d { static_assert_imm4!(IMM4); - simd_shuffle4_param!( + simd_shuffle4!( a, b, [ @@ -486,7 +486,7 @@ pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d) -> __m256 #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_blend_ps(a: __m256, b: __m256) -> __m256 { static_assert_imm8!(IMM8); - simd_shuffle8_param!( + simd_shuffle8!( a, b, [ @@ -930,7 +930,7 @@ pub unsafe fn _mm256_cvttps_epi32(a: __m256) -> __m256i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extractf128_ps(a: __m256) -> __m128 { static_assert_imm1!(IMM1); - simd_shuffle4_param!( + simd_shuffle4!( a, _mm256_undefined_ps(), [[0, 1, 2, 3], [4, 5, 6, 7]][IMM1 as usize], @@ -951,7 +951,7 @@ pub unsafe fn _mm256_extractf128_ps(a: __m256) -> __m128 { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extractf128_pd(a: __m256d) -> __m128d { static_assert_imm1!(IMM1); - simd_shuffle2_param!(a, _mm256_undefined_pd(), [[0, 1], [2, 3]][IMM1 as usize]) + simd_shuffle2!(a, _mm256_undefined_pd(), [[0, 1], [2, 3]][IMM1 as usize]) } /// Extracts 128 bits (composed of integer data) from `a`, selected with `imm8`. @@ -967,7 +967,7 @@ pub unsafe fn _mm256_extractf128_pd(a: __m256d) -> __m128d { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extractf128_si256(a: __m256i) -> __m128i { static_assert_imm1!(IMM1); - let dst: i64x2 = simd_shuffle2_param!( + let dst: i64x2 = simd_shuffle2!( a.as_i64x4(), _mm256_undefined_si256().as_i64x4(), [[0, 1], [2, 3]][IMM1 as usize], @@ -1033,7 +1033,7 @@ pub unsafe fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permute_ps(a: __m256) -> __m256 { static_assert_imm8!(IMM8); - simd_shuffle8_param!( + simd_shuffle8!( a, _mm256_undefined_ps(), [ @@ -1060,7 +1060,7 @@ pub unsafe fn _mm256_permute_ps(a: __m256) -> __m256 { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_permute_ps(a: __m128) -> __m128 { static_assert_imm8!(IMM8); - simd_shuffle4_param!( + simd_shuffle4!( a, _mm_undefined_ps(), [ @@ -1107,7 +1107,7 @@ pub unsafe fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permute_pd(a: __m256d) -> __m256d { static_assert_imm4!(IMM4); - simd_shuffle4_param!( + simd_shuffle4!( a, _mm256_undefined_pd(), [ @@ -1130,7 +1130,7 @@ pub unsafe fn _mm256_permute_pd(a: __m256d) -> __m256d { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_permute_pd(a: __m128d) -> __m128d { static_assert_imm2!(IMM2); - simd_shuffle2_param!( + simd_shuffle2!( a, _mm_undefined_pd(), [(IMM2 as u32) & 1, (IMM2 as u32 >> 1) & 1], @@ -1257,7 +1257,7 @@ pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_insertf128_ps(a: __m256, b: __m128) -> __m256 { static_assert_imm1!(IMM1); - simd_shuffle8_param!( + simd_shuffle8!( a, _mm256_castps128_ps256(b), [[8, 9, 10, 11, 4, 5, 6, 7], [0, 1, 2, 3, 8, 9, 10, 11]][IMM1 as usize], @@ -1279,7 +1279,7 @@ pub unsafe fn _mm256_insertf128_ps(a: __m256, b: __m128) -> __m #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_insertf128_pd(a: __m256d, b: __m128d) -> __m256d { static_assert_imm1!(IMM1); - simd_shuffle4_param!( + simd_shuffle4!( a, _mm256_castpd128_pd256(b), [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize], @@ -1300,7 +1300,7 @@ pub unsafe fn _mm256_insertf128_pd(a: __m256d, b: __m128d) -> _ #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_insertf128_si256(a: __m256i, b: __m128i) -> __m256i { static_assert_imm1!(IMM1); - let dst: i64x4 = simd_shuffle4_param!( + let dst: i64x4 = simd_shuffle4!( a.as_i64x4(), _mm256_castsi128_si256(b).as_i64x4(), [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize], diff --git a/crates/core_arch/src/x86/avx2.rs b/crates/core_arch/src/x86/avx2.rs index 295737dbbb..e0c7fcfab6 100644 --- a/crates/core_arch/src/x86/avx2.rs +++ b/crates/core_arch/src/x86/avx2.rs @@ -370,7 +370,7 @@ pub unsafe fn _mm_blend_epi32(a: __m128i, b: __m128i) -> __m128 static_assert_imm4!(IMM4); let a = a.as_i32x4(); let b = b.as_i32x4(); - let r: i32x4 = simd_shuffle4_param!( + let r: i32x4 = simd_shuffle4!( a, b, [ @@ -395,7 +395,7 @@ pub unsafe fn _mm256_blend_epi32(a: __m256i, b: __m256i) -> __m static_assert_imm8!(IMM8); let a = a.as_i32x8(); let b = b.as_i32x8(); - let r: i32x8 = simd_shuffle8_param!( + let r: i32x8 = simd_shuffle8!( a, b, [ @@ -425,7 +425,7 @@ pub unsafe fn _mm256_blend_epi16(a: __m256i, b: __m256i) -> __m let a = a.as_i16x16(); let b = b.as_i16x16(); - let r: i16x16 = simd_shuffle16_param!( + let r: i16x16 = simd_shuffle16!( a, b, [ @@ -890,7 +890,7 @@ pub unsafe fn _mm256_extracti128_si256(a: __m256i) -> __m128i { static_assert_imm1!(IMM1); let a = a.as_i64x4(); let b = _mm256_undefined_si256().as_i64x4(); - let dst: i64x2 = simd_shuffle2_param!(a, b, [[0, 1], [2, 3]][IMM1 as usize]); + let dst: i64x2 = simd_shuffle2!(a, b, [[0, 1], [2, 3]][IMM1 as usize]); transmute(dst) } @@ -1713,7 +1713,7 @@ pub unsafe fn _mm256_inserti128_si256(a: __m256i, b: __m128i) - let a = a.as_i64x4(); let b = _mm256_castsi128_si256(b).as_i64x4(); let dst: i64x4 = - simd_shuffle4_param!(a, b, [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]); + simd_shuffle4!(a, b, [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]); transmute(dst) } @@ -2202,7 +2202,7 @@ pub unsafe fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i { pub unsafe fn _mm256_permute4x64_epi64(a: __m256i) -> __m256i { static_assert_imm8!(IMM8); let zero = _mm256_setzero_si256().as_i64x4(); - let r: i64x4 = simd_shuffle4_param!( + let r: i64x4 = simd_shuffle4!( a.as_i64x4(), zero, [ @@ -2239,7 +2239,7 @@ pub unsafe fn _mm256_permute2x128_si256(a: __m256i, b: __m256i) #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permute4x64_pd(a: __m256d) -> __m256d { static_assert_imm8!(IMM8); - simd_shuffle4_param!( + simd_shuffle4!( a, _mm256_undefined_pd(), [ @@ -2352,7 +2352,7 @@ pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_shuffle_epi32(a: __m256i) -> __m256i { static_assert_imm8!(MASK); - let r: i32x8 = simd_shuffle8_param!( + let r: i32x8 = simd_shuffle8!( a.as_i32x8(), a.as_i32x8(), [ @@ -2382,7 +2382,7 @@ pub unsafe fn _mm256_shuffle_epi32(a: __m256i) -> __m256i { pub unsafe fn _mm256_shufflehi_epi16(a: __m256i) -> __m256i { static_assert_imm8!(IMM8); let a = a.as_i16x16(); - let r: i16x16 = simd_shuffle16_param!( + let r: i16x16 = simd_shuffle16!( a, a, [ @@ -2420,7 +2420,7 @@ pub unsafe fn _mm256_shufflehi_epi16(a: __m256i) -> __m256i { pub unsafe fn _mm256_shufflelo_epi16(a: __m256i) -> __m256i { static_assert_imm8!(IMM8); let a = a.as_i16x16(); - let r: i16x16 = simd_shuffle16_param!( + let r: i16x16 = simd_shuffle16!( a, a, [ @@ -2587,7 +2587,7 @@ pub unsafe fn _mm256_bslli_epi128(a: __m256i) -> __m256i { static_assert_imm8!(IMM8); let a = a.as_i8x32(); let zero = _mm256_setzero_si256().as_i8x32(); - let r: i8x32 = simd_shuffle32_param!( + let r: i8x32 = simd_shuffle32!( zero, a, [ diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index a66d1c2934..8c2c9a2058 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -7133,7 +7133,7 @@ pub unsafe fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i { pub unsafe fn _mm512_shufflelo_epi16(a: __m512i) -> __m512i { static_assert_imm8!(IMM8); let a = a.as_i16x32(); - let r: i16x32 = simd_shuffle32_param!( + let r: i16x32 = simd_shuffle32!( a, a, [ @@ -7277,7 +7277,7 @@ pub unsafe fn _mm_maskz_shufflelo_epi16(k: __mmask8, a: __m128i pub unsafe fn _mm512_shufflehi_epi16(a: __m512i) -> __m512i { static_assert_imm8!(IMM8); let a = a.as_i16x32(); - let r: i16x32 = simd_shuffle32_param!( + let r: i16x32 = simd_shuffle32!( a, a, [ @@ -8875,7 +8875,7 @@ pub unsafe fn _mm512_bslli_epi128(a: __m512i) -> __m512i { static_assert_imm8!(IMM8); let a = a.as_i8x64(); let zero = _mm512_setzero_si512().as_i8x64(); - let r: i8x64 = simd_shuffle64_param!( + let r: i8x64 = simd_shuffle64!( zero, a, [ diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 3d750cd84d..766acf46f7 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -19215,7 +19215,7 @@ pub unsafe fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> _ #[rustc_legacy_const_generics(1)] pub unsafe fn _mm512_permute_ps(a: __m512) -> __m512 { static_assert_imm8!(MASK); - simd_shuffle16_param!( + simd_shuffle16!( a, a, [ @@ -19333,7 +19333,7 @@ pub unsafe fn _mm_maskz_permute_ps(k: __mmask8, a: __m128) -> _ #[rustc_legacy_const_generics(1)] pub unsafe fn _mm512_permute_pd(a: __m512d) -> __m512d { static_assert_imm8!(MASK); - simd_shuffle8_param!( + simd_shuffle8!( a, a, [ @@ -19451,7 +19451,7 @@ pub unsafe fn _mm_maskz_permute_pd(k: __mmask8, a: __m128d) -> #[rustc_legacy_const_generics(1)] pub unsafe fn _mm512_permutex_epi64(a: __m512i) -> __m512i { static_assert_imm8!(MASK); - simd_shuffle8_param!( + simd_shuffle8!( a, a, [ @@ -19507,7 +19507,7 @@ pub unsafe fn _mm512_maskz_permutex_epi64(k: __mmask8, a: __m51 #[rustc_legacy_const_generics(1)] pub unsafe fn _mm256_permutex_epi64(a: __m256i) -> __m256i { static_assert_imm8!(MASK); - simd_shuffle4_param!( + simd_shuffle4!( a, a, [ @@ -19559,7 +19559,7 @@ pub unsafe fn _mm256_maskz_permutex_epi64(k: __mmask8, a: __m25 #[rustc_legacy_const_generics(1)] pub unsafe fn _mm512_permutex_pd(a: __m512d) -> __m512d { static_assert_imm8!(MASK); - simd_shuffle8_param!( + simd_shuffle8!( a, a, [ @@ -19613,7 +19613,7 @@ pub unsafe fn _mm512_maskz_permutex_pd(k: __mmask8, a: __m512d) #[rustc_legacy_const_generics(1)] pub unsafe fn _mm256_permutex_pd(a: __m256d) -> __m256d { static_assert_imm8!(MASK); - simd_shuffle4_param!( + simd_shuffle4!( a, a, [ @@ -20867,7 +20867,7 @@ pub unsafe fn _mm_mask2_permutex2var_pd( #[rustc_legacy_const_generics(1)] pub unsafe fn _mm512_shuffle_epi32(a: __m512i) -> __m512i { static_assert_imm8!(MASK); - let r: i32x16 = simd_shuffle16_param!( + let r: i32x16 = simd_shuffle16!( a.as_i32x16(), a.as_i32x16(), [ @@ -21003,7 +21003,7 @@ pub unsafe fn _mm_maskz_shuffle_epi32( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm512_shuffle_ps(a: __m512, b: __m512) -> __m512 { static_assert_imm8!(MASK); - simd_shuffle16_param!( + simd_shuffle16!( a, b, [ @@ -21140,7 +21140,7 @@ pub unsafe fn _mm_maskz_shuffle_ps(k: __mmask8, a: __m128, b: _ #[rustc_legacy_const_generics(2)] pub unsafe fn _mm512_shuffle_pd(a: __m512d, b: __m512d) -> __m512d { static_assert_imm8!(MASK); - simd_shuffle8_param!( + simd_shuffle8!( a, b, [ @@ -21275,7 +21275,7 @@ pub unsafe fn _mm512_shuffle_i32x4(a: __m512i, b: __m512i) -> _ static_assert_imm8!(MASK); let a = a.as_i32x16(); let b = b.as_i32x16(); - let r: i32x16 = simd_shuffle16_param!( + let r: i32x16 = simd_shuffle16!( a, b, [ @@ -21347,7 +21347,7 @@ pub unsafe fn _mm256_shuffle_i32x4(a: __m256i, b: __m256i) -> _ static_assert_imm8!(MASK); let a = a.as_i32x8(); let b = b.as_i32x8(); - let r: i32x8 = simd_shuffle8_param!( + let r: i32x8 = simd_shuffle8!( a, b, [ @@ -21411,7 +21411,7 @@ pub unsafe fn _mm512_shuffle_i64x2(a: __m512i, b: __m512i) -> _ static_assert_imm8!(MASK); let a = a.as_i64x8(); let b = b.as_i64x8(); - let r: i64x8 = simd_shuffle8_param!( + let r: i64x8 = simd_shuffle8!( a, b, [ @@ -21475,7 +21475,7 @@ pub unsafe fn _mm256_shuffle_i64x2(a: __m256i, b: __m256i) -> _ static_assert_imm8!(MASK); let a = a.as_i64x4(); let b = b.as_i64x4(); - let r: i64x4 = simd_shuffle4_param!( + let r: i64x4 = simd_shuffle4!( a, b, [ @@ -21535,7 +21535,7 @@ pub unsafe fn _mm512_shuffle_f32x4(a: __m512, b: __m512) -> __m static_assert_imm8!(MASK); let a = a.as_f32x16(); let b = b.as_f32x16(); - let r: f32x16 = simd_shuffle16_param!( + let r: f32x16 = simd_shuffle16!( a, b, [ @@ -21607,7 +21607,7 @@ pub unsafe fn _mm256_shuffle_f32x4(a: __m256, b: __m256) -> __m static_assert_imm8!(MASK); let a = a.as_f32x8(); let b = b.as_f32x8(); - let r: f32x8 = simd_shuffle8_param!( + let r: f32x8 = simd_shuffle8!( a, b, [ @@ -21671,7 +21671,7 @@ pub unsafe fn _mm512_shuffle_f64x2(a: __m512d, b: __m512d) -> _ static_assert_imm8!(MASK); let a = a.as_f64x8(); let b = b.as_f64x8(); - let r: f64x8 = simd_shuffle8_param!( + let r: f64x8 = simd_shuffle8!( a, b, [ @@ -21735,7 +21735,7 @@ pub unsafe fn _mm256_shuffle_f64x2(a: __m256d, b: __m256d) -> _ static_assert_imm8!(MASK); let a = a.as_f64x4(); let b = b.as_f64x4(); - let r: f64x4 = simd_shuffle4_param!( + let r: f64x4 = simd_shuffle4!( a, b, [ diff --git a/crates/core_arch/src/x86/sse.rs b/crates/core_arch/src/x86/sse.rs index 3e009531e1..a75a31ffe4 100644 --- a/crates/core_arch/src/x86/sse.rs +++ b/crates/core_arch/src/x86/sse.rs @@ -1011,7 +1011,7 @@ pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_shuffle_ps(a: __m128, b: __m128) -> __m128 { static_assert_imm8!(MASK); - simd_shuffle4_param!( + simd_shuffle4!( a, b, [ diff --git a/crates/core_arch/src/x86/sse2.rs b/crates/core_arch/src/x86/sse2.rs index 92e6bc4a3d..2f3e719668 100644 --- a/crates/core_arch/src/x86/sse2.rs +++ b/crates/core_arch/src/x86/sse2.rs @@ -432,7 +432,7 @@ unsafe fn _mm_slli_si128_impl(a: __m128i) -> __m128i { } } let zero = _mm_set1_epi8(0).as_i8x16(); - transmute::(simd_shuffle16_param!( + transmute::(simd_shuffle16!( zero, a.as_i8x16(), [ @@ -635,7 +635,7 @@ unsafe fn _mm_srli_si128_impl(a: __m128i) -> __m128i { } } let zero = _mm_set1_epi8(0).as_i8x16(); - let x: i8x16 = simd_shuffle16_param!( + let x: i8x16 = simd_shuffle16!( a.as_i8x16(), zero, [ @@ -1391,7 +1391,7 @@ pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 { pub unsafe fn _mm_shuffle_epi32(a: __m128i) -> __m128i { static_assert_imm8!(IMM8); let a = a.as_i32x4(); - let x: i32x4 = simd_shuffle4_param!( + let x: i32x4 = simd_shuffle4!( a, a, [ @@ -1419,7 +1419,7 @@ pub unsafe fn _mm_shuffle_epi32(a: __m128i) -> __m128i { pub unsafe fn _mm_shufflehi_epi16(a: __m128i) -> __m128i { static_assert_imm8!(IMM8); let a = a.as_i16x8(); - let x: i16x8 = simd_shuffle8_param!( + let x: i16x8 = simd_shuffle8!( a, a, [ @@ -1451,7 +1451,7 @@ pub unsafe fn _mm_shufflehi_epi16(a: __m128i) -> __m128i { pub unsafe fn _mm_shufflelo_epi16(a: __m128i) -> __m128i { static_assert_imm8!(IMM8); let a = a.as_i16x8(); - let x: i16x8 = simd_shuffle8_param!( + let x: i16x8 = simd_shuffle8!( a, a, [ @@ -2653,7 +2653,7 @@ pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_shuffle_pd(a: __m128d, b: __m128d) -> __m128d { static_assert_imm8!(MASK); - simd_shuffle2_param!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) + simd_shuffle2!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) } /// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower diff --git a/crates/core_arch/src/x86/ssse3.rs b/crates/core_arch/src/x86/ssse3.rs index 9b815dcd6c..4beb496b6b 100644 --- a/crates/core_arch/src/x86/ssse3.rs +++ b/crates/core_arch/src/x86/ssse3.rs @@ -113,7 +113,7 @@ pub unsafe fn _mm_alignr_epi8(a: __m128i, b: __m128i) -> __m128 shift + i } } - let r: i8x16 = simd_shuffle16_param!( + let r: i8x16 = simd_shuffle16!( b.as_i8x16(), a.as_i8x16(), [ From 3fa765c4942e7d6ed3505966a0a18a73dfacfedc Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 11 May 2021 13:30:58 +0200 Subject: [PATCH 4/5] fix generated neon files --- .../core_arch/src/aarch64/neon/generated.rs | 1116 ++++++++--------- .../src/arm_shared/neon/generated.rs | 540 ++++---- crates/stdarch-gen/neon.spec | 154 +-- crates/stdarch-gen/src/main.rs | 42 +- 4 files changed, 928 insertions(+), 924 deletions(-) diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index c02b59be7f..375d221499 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -1171,14 +1171,14 @@ pub unsafe fn vcopy_lane_s8(a: int8x8_t, b: static_assert_imm3!(LANE1); static_assert_imm3!(LANE2); match LANE1 & 0b111 { - 0 => simd_shuffle8(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle8(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), - 2 => simd_shuffle8(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), - 3 => simd_shuffle8(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), - 4 => simd_shuffle8(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), - 5 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), - 6 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), - 7 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), + 0 => simd_shuffle8!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), + 2 => simd_shuffle8!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), + 3 => simd_shuffle8!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), + 4 => simd_shuffle8!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), + 5 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), + 6 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), + 7 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1192,22 +1192,22 @@ pub unsafe fn vcopyq_laneq_s8(a: int8x16_t, static_assert_imm4!(LANE1); static_assert_imm4!(LANE2); match LANE1 & 0b1111 { - 0 => simd_shuffle16(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 1 => simd_shuffle16(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 2 => simd_shuffle16(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 3 => simd_shuffle16(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 4 => simd_shuffle16(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 5 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 6 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 7 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), - 8 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), - 9 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), - 10 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), - 11 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), - 12 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), - 13 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), - 14 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), - 15 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), + 0 => simd_shuffle16!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle16!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 2 => simd_shuffle16!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 3 => simd_shuffle16!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 4 => simd_shuffle16!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 5 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 6 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 7 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), + 8 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), + 9 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), + 10 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), + 11 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), + 12 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), + 13 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), + 14 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), + 15 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1221,10 +1221,10 @@ pub unsafe fn vcopy_lane_s16(a: int16x4_t, b static_assert_imm2!(LANE1); static_assert_imm2!(LANE2); match LANE1 & 0b11 { - 0 => simd_shuffle4(a, b, [4 + LANE2 as u32, 1, 2, 3]), - 1 => simd_shuffle4(a, b, [0, 4 + LANE2 as u32, 2, 3]), - 2 => simd_shuffle4(a, b, [0, 1, 4 + LANE2 as u32, 3]), - 3 => simd_shuffle4(a, b, [0, 1, 2, 4 + LANE2 as u32]), + 0 => simd_shuffle4!(a, b, [4 + LANE2 as u32, 1, 2, 3]), + 1 => simd_shuffle4!(a, b, [0, 4 + LANE2 as u32, 2, 3]), + 2 => simd_shuffle4!(a, b, [0, 1, 4 + LANE2 as u32, 3]), + 3 => simd_shuffle4!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1238,14 +1238,14 @@ pub unsafe fn vcopyq_laneq_s16(a: int16x8_t, static_assert_imm3!(LANE1); static_assert_imm3!(LANE2); match LANE1 & 0b111 { - 0 => simd_shuffle8(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle8(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), - 2 => simd_shuffle8(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), - 3 => simd_shuffle8(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), - 4 => simd_shuffle8(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), - 5 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), - 6 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), - 7 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), + 0 => simd_shuffle8!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), + 2 => simd_shuffle8!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), + 3 => simd_shuffle8!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), + 4 => simd_shuffle8!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), + 5 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), + 6 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), + 7 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1259,8 +1259,8 @@ pub unsafe fn vcopy_lane_s32(a: int32x2_t, b static_assert_imm1!(LANE1); static_assert_imm1!(LANE2); match LANE1 & 0b1 { - 0 => simd_shuffle2(a, b, [2 + LANE2 as u32, 1]), - 1 => simd_shuffle2(a, b, [0, 2 + LANE2 as u32]), + 0 => simd_shuffle2!(a, b, [2 + LANE2 as u32, 1]), + 1 => simd_shuffle2!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1274,10 +1274,10 @@ pub unsafe fn vcopyq_laneq_s32(a: int32x4_t, static_assert_imm2!(LANE1); static_assert_imm2!(LANE2); match LANE1 & 0b11 { - 0 => simd_shuffle4(a, b, [4 + LANE2 as u32, 1, 2, 3]), - 1 => simd_shuffle4(a, b, [0, 4 + LANE2 as u32, 2, 3]), - 2 => simd_shuffle4(a, b, [0, 1, 4 + LANE2 as u32, 3]), - 3 => simd_shuffle4(a, b, [0, 1, 2, 4 + LANE2 as u32]), + 0 => simd_shuffle4!(a, b, [4 + LANE2 as u32, 1, 2, 3]), + 1 => simd_shuffle4!(a, b, [0, 4 + LANE2 as u32, 2, 3]), + 2 => simd_shuffle4!(a, b, [0, 1, 4 + LANE2 as u32, 3]), + 3 => simd_shuffle4!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1291,8 +1291,8 @@ pub unsafe fn vcopyq_laneq_s64(a: int64x2_t, static_assert_imm1!(LANE1); static_assert_imm1!(LANE2); match LANE1 & 0b1 { - 0 => simd_shuffle2(a, b, [2 + LANE2 as u32, 1]), - 1 => simd_shuffle2(a, b, [0, 2 + LANE2 as u32]), + 0 => simd_shuffle2!(a, b, [2 + LANE2 as u32, 1]), + 1 => simd_shuffle2!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1306,14 +1306,14 @@ pub unsafe fn vcopy_lane_u8(a: uint8x8_t, b: static_assert_imm3!(LANE1); static_assert_imm3!(LANE2); match LANE1 & 0b111 { - 0 => simd_shuffle8(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle8(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), - 2 => simd_shuffle8(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), - 3 => simd_shuffle8(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), - 4 => simd_shuffle8(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), - 5 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), - 6 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), - 7 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), + 0 => simd_shuffle8!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), + 2 => simd_shuffle8!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), + 3 => simd_shuffle8!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), + 4 => simd_shuffle8!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), + 5 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), + 6 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), + 7 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1327,22 +1327,22 @@ pub unsafe fn vcopyq_laneq_u8(a: uint8x16_t, static_assert_imm4!(LANE1); static_assert_imm4!(LANE2); match LANE1 & 0b1111 { - 0 => simd_shuffle16(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 1 => simd_shuffle16(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 2 => simd_shuffle16(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 3 => simd_shuffle16(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 4 => simd_shuffle16(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 5 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 6 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 7 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), - 8 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), - 9 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), - 10 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), - 11 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), - 12 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), - 13 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), - 14 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), - 15 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), + 0 => simd_shuffle16!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle16!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 2 => simd_shuffle16!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 3 => simd_shuffle16!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 4 => simd_shuffle16!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 5 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 6 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 7 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), + 8 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), + 9 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), + 10 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), + 11 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), + 12 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), + 13 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), + 14 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), + 15 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1356,10 +1356,10 @@ pub unsafe fn vcopy_lane_u16(a: uint16x4_t, static_assert_imm2!(LANE1); static_assert_imm2!(LANE2); match LANE1 & 0b11 { - 0 => simd_shuffle4(a, b, [4 + LANE2 as u32, 1, 2, 3]), - 1 => simd_shuffle4(a, b, [0, 4 + LANE2 as u32, 2, 3]), - 2 => simd_shuffle4(a, b, [0, 1, 4 + LANE2 as u32, 3]), - 3 => simd_shuffle4(a, b, [0, 1, 2, 4 + LANE2 as u32]), + 0 => simd_shuffle4!(a, b, [4 + LANE2 as u32, 1, 2, 3]), + 1 => simd_shuffle4!(a, b, [0, 4 + LANE2 as u32, 2, 3]), + 2 => simd_shuffle4!(a, b, [0, 1, 4 + LANE2 as u32, 3]), + 3 => simd_shuffle4!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1373,14 +1373,14 @@ pub unsafe fn vcopyq_laneq_u16(a: uint16x8_t static_assert_imm3!(LANE1); static_assert_imm3!(LANE2); match LANE1 & 0b111 { - 0 => simd_shuffle8(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle8(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), - 2 => simd_shuffle8(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), - 3 => simd_shuffle8(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), - 4 => simd_shuffle8(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), - 5 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), - 6 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), - 7 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), + 0 => simd_shuffle8!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), + 2 => simd_shuffle8!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), + 3 => simd_shuffle8!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), + 4 => simd_shuffle8!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), + 5 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), + 6 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), + 7 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1394,8 +1394,8 @@ pub unsafe fn vcopy_lane_u32(a: uint32x2_t, static_assert_imm1!(LANE1); static_assert_imm1!(LANE2); match LANE1 & 0b1 { - 0 => simd_shuffle2(a, b, [2 + LANE2 as u32, 1]), - 1 => simd_shuffle2(a, b, [0, 2 + LANE2 as u32]), + 0 => simd_shuffle2!(a, b, [2 + LANE2 as u32, 1]), + 1 => simd_shuffle2!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1409,10 +1409,10 @@ pub unsafe fn vcopyq_laneq_u32(a: uint32x4_t static_assert_imm2!(LANE1); static_assert_imm2!(LANE2); match LANE1 & 0b11 { - 0 => simd_shuffle4(a, b, [4 + LANE2 as u32, 1, 2, 3]), - 1 => simd_shuffle4(a, b, [0, 4 + LANE2 as u32, 2, 3]), - 2 => simd_shuffle4(a, b, [0, 1, 4 + LANE2 as u32, 3]), - 3 => simd_shuffle4(a, b, [0, 1, 2, 4 + LANE2 as u32]), + 0 => simd_shuffle4!(a, b, [4 + LANE2 as u32, 1, 2, 3]), + 1 => simd_shuffle4!(a, b, [0, 4 + LANE2 as u32, 2, 3]), + 2 => simd_shuffle4!(a, b, [0, 1, 4 + LANE2 as u32, 3]), + 3 => simd_shuffle4!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1426,8 +1426,8 @@ pub unsafe fn vcopyq_laneq_u64(a: uint64x2_t static_assert_imm1!(LANE1); static_assert_imm1!(LANE2); match LANE1 & 0b1 { - 0 => simd_shuffle2(a, b, [2 + LANE2 as u32, 1]), - 1 => simd_shuffle2(a, b, [0, 2 + LANE2 as u32]), + 0 => simd_shuffle2!(a, b, [2 + LANE2 as u32, 1]), + 1 => simd_shuffle2!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1441,14 +1441,14 @@ pub unsafe fn vcopy_lane_p8(a: poly8x8_t, b: static_assert_imm3!(LANE1); static_assert_imm3!(LANE2); match LANE1 & 0b111 { - 0 => simd_shuffle8(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle8(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), - 2 => simd_shuffle8(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), - 3 => simd_shuffle8(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), - 4 => simd_shuffle8(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), - 5 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), - 6 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), - 7 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), + 0 => simd_shuffle8!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), + 2 => simd_shuffle8!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), + 3 => simd_shuffle8!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), + 4 => simd_shuffle8!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), + 5 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), + 6 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), + 7 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1462,22 +1462,22 @@ pub unsafe fn vcopyq_laneq_p8(a: poly8x16_t, static_assert_imm4!(LANE1); static_assert_imm4!(LANE2); match LANE1 & 0b1111 { - 0 => simd_shuffle16(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 1 => simd_shuffle16(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 2 => simd_shuffle16(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 3 => simd_shuffle16(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 4 => simd_shuffle16(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 5 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 6 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 7 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), - 8 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), - 9 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), - 10 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), - 11 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), - 12 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), - 13 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), - 14 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), - 15 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), + 0 => simd_shuffle16!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle16!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 2 => simd_shuffle16!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 3 => simd_shuffle16!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 4 => simd_shuffle16!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 5 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 6 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 7 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), + 8 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), + 9 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), + 10 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), + 11 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), + 12 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), + 13 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), + 14 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), + 15 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1491,10 +1491,10 @@ pub unsafe fn vcopy_lane_p16(a: poly16x4_t, static_assert_imm2!(LANE1); static_assert_imm2!(LANE2); match LANE1 & 0b11 { - 0 => simd_shuffle4(a, b, [4 + LANE2 as u32, 1, 2, 3]), - 1 => simd_shuffle4(a, b, [0, 4 + LANE2 as u32, 2, 3]), - 2 => simd_shuffle4(a, b, [0, 1, 4 + LANE2 as u32, 3]), - 3 => simd_shuffle4(a, b, [0, 1, 2, 4 + LANE2 as u32]), + 0 => simd_shuffle4!(a, b, [4 + LANE2 as u32, 1, 2, 3]), + 1 => simd_shuffle4!(a, b, [0, 4 + LANE2 as u32, 2, 3]), + 2 => simd_shuffle4!(a, b, [0, 1, 4 + LANE2 as u32, 3]), + 3 => simd_shuffle4!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1508,14 +1508,14 @@ pub unsafe fn vcopyq_laneq_p16(a: poly16x8_t static_assert_imm3!(LANE1); static_assert_imm3!(LANE2); match LANE1 & 0b111 { - 0 => simd_shuffle8(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle8(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), - 2 => simd_shuffle8(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), - 3 => simd_shuffle8(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), - 4 => simd_shuffle8(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), - 5 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), - 6 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), - 7 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), + 0 => simd_shuffle8!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), + 2 => simd_shuffle8!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), + 3 => simd_shuffle8!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), + 4 => simd_shuffle8!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), + 5 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), + 6 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), + 7 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1529,8 +1529,8 @@ pub unsafe fn vcopyq_laneq_p64(a: poly64x2_t static_assert_imm1!(LANE1); static_assert_imm1!(LANE2); match LANE1 & 0b1 { - 0 => simd_shuffle2(a, b, [2 + LANE2 as u32, 1]), - 1 => simd_shuffle2(a, b, [0, 2 + LANE2 as u32]), + 0 => simd_shuffle2!(a, b, [2 + LANE2 as u32, 1]), + 1 => simd_shuffle2!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1544,8 +1544,8 @@ pub unsafe fn vcopy_lane_f32(a: float32x2_t, static_assert_imm1!(LANE1); static_assert_imm1!(LANE2); match LANE1 & 0b1 { - 0 => simd_shuffle2(a, b, [2 + LANE2 as u32, 1]), - 1 => simd_shuffle2(a, b, [0, 2 + LANE2 as u32]), + 0 => simd_shuffle2!(a, b, [2 + LANE2 as u32, 1]), + 1 => simd_shuffle2!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1559,10 +1559,10 @@ pub unsafe fn vcopyq_laneq_f32(a: float32x4_ static_assert_imm2!(LANE1); static_assert_imm2!(LANE2); match LANE1 & 0b11 { - 0 => simd_shuffle4(a, b, [4 + LANE2 as u32, 1, 2, 3]), - 1 => simd_shuffle4(a, b, [0, 4 + LANE2 as u32, 2, 3]), - 2 => simd_shuffle4(a, b, [0, 1, 4 + LANE2 as u32, 3]), - 3 => simd_shuffle4(a, b, [0, 1, 2, 4 + LANE2 as u32]), + 0 => simd_shuffle4!(a, b, [4 + LANE2 as u32, 1, 2, 3]), + 1 => simd_shuffle4!(a, b, [0, 4 + LANE2 as u32, 2, 3]), + 2 => simd_shuffle4!(a, b, [0, 1, 4 + LANE2 as u32, 3]), + 3 => simd_shuffle4!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1576,8 +1576,8 @@ pub unsafe fn vcopyq_laneq_f64(a: float64x2_ static_assert_imm1!(LANE1); static_assert_imm1!(LANE2); match LANE1 & 0b1 { - 0 => simd_shuffle2(a, b, [2 + LANE2 as u32, 1]), - 1 => simd_shuffle2(a, b, [0, 2 + LANE2 as u32]), + 0 => simd_shuffle2!(a, b, [2 + LANE2 as u32, 1]), + 1 => simd_shuffle2!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1590,16 +1590,16 @@ pub unsafe fn vcopyq_laneq_f64(a: float64x2_ pub unsafe fn vcopy_laneq_s8(a: int8x8_t, b: int8x16_t) -> int8x8_t { static_assert_imm3!(LANE1); static_assert_imm4!(LANE2); - let a: int8x16_t = simd_shuffle16(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + let a: int8x16_t = simd_shuffle16!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); match LANE1 & 0b111 { - 0 => simd_shuffle8(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle8(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), - 2 => simd_shuffle8(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), - 3 => simd_shuffle8(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), - 4 => simd_shuffle8(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), - 5 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), - 6 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), - 7 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), + 0 => simd_shuffle8!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), + 2 => simd_shuffle8!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), + 3 => simd_shuffle8!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), + 4 => simd_shuffle8!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), + 5 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), + 6 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), + 7 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1612,12 +1612,12 @@ pub unsafe fn vcopy_laneq_s8(a: int8x8_t, b: pub unsafe fn vcopy_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t { static_assert_imm2!(LANE1); static_assert_imm3!(LANE2); - let a: int16x8_t = simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + let a: int16x8_t = simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); match LANE1 & 0b11 { - 0 => simd_shuffle4(a, b, [8 + LANE2 as u32, 1, 2, 3]), - 1 => simd_shuffle4(a, b, [0, 8 + LANE2 as u32, 2, 3]), - 2 => simd_shuffle4(a, b, [0, 1, 8 + LANE2 as u32, 3]), - 3 => simd_shuffle4(a, b, [0, 1, 2, 8 + LANE2 as u32]), + 0 => simd_shuffle4!(a, b, [8 + LANE2 as u32, 1, 2, 3]), + 1 => simd_shuffle4!(a, b, [0, 8 + LANE2 as u32, 2, 3]), + 2 => simd_shuffle4!(a, b, [0, 1, 8 + LANE2 as u32, 3]), + 3 => simd_shuffle4!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1630,10 +1630,10 @@ pub unsafe fn vcopy_laneq_s16(a: int16x4_t, pub unsafe fn vcopy_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t { static_assert_imm1!(LANE1); static_assert_imm2!(LANE2); - let a: int32x4_t = simd_shuffle4(a, a, [0, 1, 2, 3]); + let a: int32x4_t = simd_shuffle4!(a, a, [0, 1, 2, 3]); match LANE1 & 0b1 { - 0 => simd_shuffle2(a, b, [4 + LANE2 as u32, 1]), - 1 => simd_shuffle2(a, b, [0, 4 + LANE2 as u32]), + 0 => simd_shuffle2!(a, b, [4 + LANE2 as u32, 1]), + 1 => simd_shuffle2!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1646,16 +1646,16 @@ pub unsafe fn vcopy_laneq_s32(a: int32x2_t, pub unsafe fn vcopy_laneq_u8(a: uint8x8_t, b: uint8x16_t) -> uint8x8_t { static_assert_imm3!(LANE1); static_assert_imm4!(LANE2); - let a: uint8x16_t = simd_shuffle16(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + let a: uint8x16_t = simd_shuffle16!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); match LANE1 & 0b111 { - 0 => simd_shuffle8(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle8(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), - 2 => simd_shuffle8(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), - 3 => simd_shuffle8(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), - 4 => simd_shuffle8(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), - 5 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), - 6 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), - 7 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), + 0 => simd_shuffle8!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), + 2 => simd_shuffle8!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), + 3 => simd_shuffle8!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), + 4 => simd_shuffle8!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), + 5 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), + 6 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), + 7 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1668,12 +1668,12 @@ pub unsafe fn vcopy_laneq_u8(a: uint8x8_t, b pub unsafe fn vcopy_laneq_u16(a: uint16x4_t, b: uint16x8_t) -> uint16x4_t { static_assert_imm2!(LANE1); static_assert_imm3!(LANE2); - let a: uint16x8_t = simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + let a: uint16x8_t = simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); match LANE1 & 0b11 { - 0 => simd_shuffle4(a, b, [8 + LANE2 as u32, 1, 2, 3]), - 1 => simd_shuffle4(a, b, [0, 8 + LANE2 as u32, 2, 3]), - 2 => simd_shuffle4(a, b, [0, 1, 8 + LANE2 as u32, 3]), - 3 => simd_shuffle4(a, b, [0, 1, 2, 8 + LANE2 as u32]), + 0 => simd_shuffle4!(a, b, [8 + LANE2 as u32, 1, 2, 3]), + 1 => simd_shuffle4!(a, b, [0, 8 + LANE2 as u32, 2, 3]), + 2 => simd_shuffle4!(a, b, [0, 1, 8 + LANE2 as u32, 3]), + 3 => simd_shuffle4!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1686,10 +1686,10 @@ pub unsafe fn vcopy_laneq_u16(a: uint16x4_t, pub unsafe fn vcopy_laneq_u32(a: uint32x2_t, b: uint32x4_t) -> uint32x2_t { static_assert_imm1!(LANE1); static_assert_imm2!(LANE2); - let a: uint32x4_t = simd_shuffle4(a, a, [0, 1, 2, 3]); + let a: uint32x4_t = simd_shuffle4!(a, a, [0, 1, 2, 3]); match LANE1 & 0b1 { - 0 => simd_shuffle2(a, b, [4 + LANE2 as u32, 1]), - 1 => simd_shuffle2(a, b, [0, 4 + LANE2 as u32]), + 0 => simd_shuffle2!(a, b, [4 + LANE2 as u32, 1]), + 1 => simd_shuffle2!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1702,16 +1702,16 @@ pub unsafe fn vcopy_laneq_u32(a: uint32x2_t, pub unsafe fn vcopy_laneq_p8(a: poly8x8_t, b: poly8x16_t) -> poly8x8_t { static_assert_imm3!(LANE1); static_assert_imm4!(LANE2); - let a: poly8x16_t = simd_shuffle16(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + let a: poly8x16_t = simd_shuffle16!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); match LANE1 & 0b111 { - 0 => simd_shuffle8(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle8(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), - 2 => simd_shuffle8(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), - 3 => simd_shuffle8(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), - 4 => simd_shuffle8(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), - 5 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), - 6 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), - 7 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), + 0 => simd_shuffle8!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), + 2 => simd_shuffle8!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), + 3 => simd_shuffle8!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), + 4 => simd_shuffle8!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), + 5 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), + 6 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), + 7 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1724,12 +1724,12 @@ pub unsafe fn vcopy_laneq_p8(a: poly8x8_t, b pub unsafe fn vcopy_laneq_p16(a: poly16x4_t, b: poly16x8_t) -> poly16x4_t { static_assert_imm2!(LANE1); static_assert_imm3!(LANE2); - let a: poly16x8_t = simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + let a: poly16x8_t = simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); match LANE1 & 0b11 { - 0 => simd_shuffle4(a, b, [8 + LANE2 as u32, 1, 2, 3]), - 1 => simd_shuffle4(a, b, [0, 8 + LANE2 as u32, 2, 3]), - 2 => simd_shuffle4(a, b, [0, 1, 8 + LANE2 as u32, 3]), - 3 => simd_shuffle4(a, b, [0, 1, 2, 8 + LANE2 as u32]), + 0 => simd_shuffle4!(a, b, [8 + LANE2 as u32, 1, 2, 3]), + 1 => simd_shuffle4!(a, b, [0, 8 + LANE2 as u32, 2, 3]), + 2 => simd_shuffle4!(a, b, [0, 1, 8 + LANE2 as u32, 3]), + 3 => simd_shuffle4!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1742,10 +1742,10 @@ pub unsafe fn vcopy_laneq_p16(a: poly16x4_t, pub unsafe fn vcopy_laneq_f32(a: float32x2_t, b: float32x4_t) -> float32x2_t { static_assert_imm1!(LANE1); static_assert_imm2!(LANE2); - let a: float32x4_t = simd_shuffle4(a, a, [0, 1, 2, 3]); + let a: float32x4_t = simd_shuffle4!(a, a, [0, 1, 2, 3]); match LANE1 & 0b1 { - 0 => simd_shuffle2(a, b, [4 + LANE2 as u32, 1]), - 1 => simd_shuffle2(a, b, [0, 4 + LANE2 as u32]), + 0 => simd_shuffle2!(a, b, [4 + LANE2 as u32, 1]), + 1 => simd_shuffle2!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1758,24 +1758,24 @@ pub unsafe fn vcopy_laneq_f32(a: float32x2_t pub unsafe fn vcopyq_lane_s8(a: int8x16_t, b: int8x8_t) -> int8x16_t { static_assert_imm4!(LANE1); static_assert_imm3!(LANE2); - let b: int8x16_t = simd_shuffle16(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + let b: int8x16_t = simd_shuffle16!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); match LANE1 & 0b1111 { - 0 => simd_shuffle16(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 1 => simd_shuffle16(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 2 => simd_shuffle16(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 3 => simd_shuffle16(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 4 => simd_shuffle16(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 5 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 6 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 7 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), - 8 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), - 9 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), - 10 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), - 11 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), - 12 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), - 13 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), - 14 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), - 15 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), + 0 => simd_shuffle16!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle16!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 2 => simd_shuffle16!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 3 => simd_shuffle16!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 4 => simd_shuffle16!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 5 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 6 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 7 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), + 8 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), + 9 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), + 10 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), + 11 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), + 12 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), + 13 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), + 14 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), + 15 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1788,16 +1788,16 @@ pub unsafe fn vcopyq_lane_s8(a: int8x16_t, b pub unsafe fn vcopyq_lane_s16(a: int16x8_t, b: int16x4_t) -> int16x8_t { static_assert_imm3!(LANE1); static_assert_imm2!(LANE2); - let b: int16x8_t = simd_shuffle8(b, b, [0, 1, 2, 3, 4, 5, 6, 7]); + let b: int16x8_t = simd_shuffle8!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]); match LANE1 & 0b111 { - 0 => simd_shuffle8(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle8(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), - 2 => simd_shuffle8(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), - 3 => simd_shuffle8(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), - 4 => simd_shuffle8(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), - 5 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), - 6 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), - 7 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), + 0 => simd_shuffle8!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), + 2 => simd_shuffle8!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), + 3 => simd_shuffle8!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), + 4 => simd_shuffle8!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), + 5 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), + 6 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), + 7 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1810,12 +1810,12 @@ pub unsafe fn vcopyq_lane_s16(a: int16x8_t, pub unsafe fn vcopyq_lane_s32(a: int32x4_t, b: int32x2_t) -> int32x4_t { static_assert_imm2!(LANE1); static_assert_imm1!(LANE2); - let b: int32x4_t = simd_shuffle4(b, b, [0, 1, 2, 3]); + let b: int32x4_t = simd_shuffle4!(b, b, [0, 1, 2, 3]); match LANE1 & 0b11 { - 0 => simd_shuffle4(a, b, [4 + LANE2 as u32, 1, 2, 3]), - 1 => simd_shuffle4(a, b, [0, 4 + LANE2 as u32, 2, 3]), - 2 => simd_shuffle4(a, b, [0, 1, 4 + LANE2 as u32, 3]), - 3 => simd_shuffle4(a, b, [0, 1, 2, 4 + LANE2 as u32]), + 0 => simd_shuffle4!(a, b, [4 + LANE2 as u32, 1, 2, 3]), + 1 => simd_shuffle4!(a, b, [0, 4 + LANE2 as u32, 2, 3]), + 2 => simd_shuffle4!(a, b, [0, 1, 4 + LANE2 as u32, 3]), + 3 => simd_shuffle4!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1828,24 +1828,24 @@ pub unsafe fn vcopyq_lane_s32(a: int32x4_t, pub unsafe fn vcopyq_lane_u8(a: uint8x16_t, b: uint8x8_t) -> uint8x16_t { static_assert_imm4!(LANE1); static_assert_imm3!(LANE2); - let b: uint8x16_t = simd_shuffle16(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + let b: uint8x16_t = simd_shuffle16!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); match LANE1 & 0b1111 { - 0 => simd_shuffle16(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 1 => simd_shuffle16(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 2 => simd_shuffle16(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 3 => simd_shuffle16(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 4 => simd_shuffle16(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 5 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 6 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 7 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), - 8 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), - 9 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), - 10 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), - 11 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), - 12 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), - 13 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), - 14 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), - 15 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), + 0 => simd_shuffle16!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle16!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 2 => simd_shuffle16!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 3 => simd_shuffle16!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 4 => simd_shuffle16!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 5 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 6 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 7 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), + 8 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), + 9 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), + 10 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), + 11 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), + 12 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), + 13 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), + 14 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), + 15 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1858,16 +1858,16 @@ pub unsafe fn vcopyq_lane_u8(a: uint8x16_t, pub unsafe fn vcopyq_lane_u16(a: uint16x8_t, b: uint16x4_t) -> uint16x8_t { static_assert_imm3!(LANE1); static_assert_imm2!(LANE2); - let b: uint16x8_t = simd_shuffle8(b, b, [0, 1, 2, 3, 4, 5, 6, 7]); + let b: uint16x8_t = simd_shuffle8!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]); match LANE1 & 0b111 { - 0 => simd_shuffle8(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle8(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), - 2 => simd_shuffle8(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), - 3 => simd_shuffle8(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), - 4 => simd_shuffle8(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), - 5 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), - 6 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), - 7 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), + 0 => simd_shuffle8!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), + 2 => simd_shuffle8!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), + 3 => simd_shuffle8!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), + 4 => simd_shuffle8!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), + 5 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), + 6 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), + 7 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1880,12 +1880,12 @@ pub unsafe fn vcopyq_lane_u16(a: uint16x8_t, pub unsafe fn vcopyq_lane_u32(a: uint32x4_t, b: uint32x2_t) -> uint32x4_t { static_assert_imm2!(LANE1); static_assert_imm1!(LANE2); - let b: uint32x4_t = simd_shuffle4(b, b, [0, 1, 2, 3]); + let b: uint32x4_t = simd_shuffle4!(b, b, [0, 1, 2, 3]); match LANE1 & 0b11 { - 0 => simd_shuffle4(a, b, [4 + LANE2 as u32, 1, 2, 3]), - 1 => simd_shuffle4(a, b, [0, 4 + LANE2 as u32, 2, 3]), - 2 => simd_shuffle4(a, b, [0, 1, 4 + LANE2 as u32, 3]), - 3 => simd_shuffle4(a, b, [0, 1, 2, 4 + LANE2 as u32]), + 0 => simd_shuffle4!(a, b, [4 + LANE2 as u32, 1, 2, 3]), + 1 => simd_shuffle4!(a, b, [0, 4 + LANE2 as u32, 2, 3]), + 2 => simd_shuffle4!(a, b, [0, 1, 4 + LANE2 as u32, 3]), + 3 => simd_shuffle4!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1898,24 +1898,24 @@ pub unsafe fn vcopyq_lane_u32(a: uint32x4_t, pub unsafe fn vcopyq_lane_p8(a: poly8x16_t, b: poly8x8_t) -> poly8x16_t { static_assert_imm4!(LANE1); static_assert_imm3!(LANE2); - let b: poly8x16_t = simd_shuffle16(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + let b: poly8x16_t = simd_shuffle16!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); match LANE1 & 0b1111 { - 0 => simd_shuffle16(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 1 => simd_shuffle16(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 2 => simd_shuffle16(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 3 => simd_shuffle16(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 4 => simd_shuffle16(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 5 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 6 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 7 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), - 8 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), - 9 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), - 10 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), - 11 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), - 12 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), - 13 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), - 14 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), - 15 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), + 0 => simd_shuffle16!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle16!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 2 => simd_shuffle16!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 3 => simd_shuffle16!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 4 => simd_shuffle16!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 5 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 6 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 7 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), + 8 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), + 9 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), + 10 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), + 11 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), + 12 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), + 13 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), + 14 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), + 15 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1928,16 +1928,16 @@ pub unsafe fn vcopyq_lane_p8(a: poly8x16_t, pub unsafe fn vcopyq_lane_p16(a: poly16x8_t, b: poly16x4_t) -> poly16x8_t { static_assert_imm3!(LANE1); static_assert_imm2!(LANE2); - let b: poly16x8_t = simd_shuffle8(b, b, [0, 1, 2, 3, 4, 5, 6, 7]); + let b: poly16x8_t = simd_shuffle8!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]); match LANE1 & 0b111 { - 0 => simd_shuffle8(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle8(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), - 2 => simd_shuffle8(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), - 3 => simd_shuffle8(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), - 4 => simd_shuffle8(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), - 5 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), - 6 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), - 7 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), + 0 => simd_shuffle8!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), + 2 => simd_shuffle8!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), + 3 => simd_shuffle8!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), + 4 => simd_shuffle8!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), + 5 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), + 6 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), + 7 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1950,10 +1950,10 @@ pub unsafe fn vcopyq_lane_p16(a: poly16x8_t, pub unsafe fn vcopyq_lane_s64(a: int64x2_t, b: int64x1_t) -> int64x2_t { static_assert_imm1!(LANE1); static_assert!(LANE2 : i32 where LANE2 == 0); - let b: int64x2_t = simd_shuffle2(b, b, [0, 1]); + let b: int64x2_t = simd_shuffle2!(b, b, [0, 1]); match LANE1 & 0b1 { - 0 => simd_shuffle2(a, b, [2 + LANE2 as u32, 1]), - 1 => simd_shuffle2(a, b, [0, 2 + LANE2 as u32]), + 0 => simd_shuffle2!(a, b, [2 + LANE2 as u32, 1]), + 1 => simd_shuffle2!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1966,10 +1966,10 @@ pub unsafe fn vcopyq_lane_s64(a: int64x2_t, pub unsafe fn vcopyq_lane_u64(a: uint64x2_t, b: uint64x1_t) -> uint64x2_t { static_assert_imm1!(LANE1); static_assert!(LANE2 : i32 where LANE2 == 0); - let b: uint64x2_t = simd_shuffle2(b, b, [0, 1]); + let b: uint64x2_t = simd_shuffle2!(b, b, [0, 1]); match LANE1 & 0b1 { - 0 => simd_shuffle2(a, b, [2 + LANE2 as u32, 1]), - 1 => simd_shuffle2(a, b, [0, 2 + LANE2 as u32]), + 0 => simd_shuffle2!(a, b, [2 + LANE2 as u32, 1]), + 1 => simd_shuffle2!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1982,10 +1982,10 @@ pub unsafe fn vcopyq_lane_u64(a: uint64x2_t, pub unsafe fn vcopyq_lane_p64(a: poly64x2_t, b: poly64x1_t) -> poly64x2_t { static_assert_imm1!(LANE1); static_assert!(LANE2 : i32 where LANE2 == 0); - let b: poly64x2_t = simd_shuffle2(b, b, [0, 1]); + let b: poly64x2_t = simd_shuffle2!(b, b, [0, 1]); match LANE1 & 0b1 { - 0 => simd_shuffle2(a, b, [2 + LANE2 as u32, 1]), - 1 => simd_shuffle2(a, b, [0, 2 + LANE2 as u32]), + 0 => simd_shuffle2!(a, b, [2 + LANE2 as u32, 1]), + 1 => simd_shuffle2!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -1998,12 +1998,12 @@ pub unsafe fn vcopyq_lane_p64(a: poly64x2_t, pub unsafe fn vcopyq_lane_f32(a: float32x4_t, b: float32x2_t) -> float32x4_t { static_assert_imm2!(LANE1); static_assert_imm1!(LANE2); - let b: float32x4_t = simd_shuffle4(b, b, [0, 1, 2, 3]); + let b: float32x4_t = simd_shuffle4!(b, b, [0, 1, 2, 3]); match LANE1 & 0b11 { - 0 => simd_shuffle4(a, b, [4 + LANE2 as u32, 1, 2, 3]), - 1 => simd_shuffle4(a, b, [0, 4 + LANE2 as u32, 2, 3]), - 2 => simd_shuffle4(a, b, [0, 1, 4 + LANE2 as u32, 3]), - 3 => simd_shuffle4(a, b, [0, 1, 2, 4 + LANE2 as u32]), + 0 => simd_shuffle4!(a, b, [4 + LANE2 as u32, 1, 2, 3]), + 1 => simd_shuffle4!(a, b, [0, 4 + LANE2 as u32, 2, 3]), + 2 => simd_shuffle4!(a, b, [0, 1, 4 + LANE2 as u32, 3]), + 3 => simd_shuffle4!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -2016,10 +2016,10 @@ pub unsafe fn vcopyq_lane_f32(a: float32x4_t pub unsafe fn vcopyq_lane_f64(a: float64x2_t, b: float64x1_t) -> float64x2_t { static_assert_imm1!(LANE1); static_assert!(LANE2 : i32 where LANE2 == 0); - let b: float64x2_t = simd_shuffle2(b, b, [0, 1]); + let b: float64x2_t = simd_shuffle2!(b, b, [0, 1]); match LANE1 & 0b1 { - 0 => simd_shuffle2(a, b, [2 + LANE2 as u32, 1]), - 1 => simd_shuffle2(a, b, [0, 2 + LANE2 as u32]), + 0 => simd_shuffle2!(a, b, [2 + LANE2 as u32, 1]), + 1 => simd_shuffle2!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } } @@ -3085,7 +3085,7 @@ pub unsafe fn vcvtpd_u64_f64(a: f64) -> u64 { #[rustc_legacy_const_generics(1)] pub unsafe fn vdupq_laneq_p64(a: poly64x2_t) -> poly64x2_t { static_assert_imm1!(N); - simd_shuffle2(a, a, [N as u32, N as u32]) + simd_shuffle2!(a, a, [N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -3095,7 +3095,7 @@ pub unsafe fn vdupq_laneq_p64(a: poly64x2_t) -> poly64x2_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdupq_lane_p64(a: poly64x1_t) -> poly64x2_t { static_assert!(N : i32 where N == 0); - simd_shuffle2(a, a, [N as u32, N as u32]) + simd_shuffle2!(a, a, [N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -3105,7 +3105,7 @@ pub unsafe fn vdupq_lane_p64(a: poly64x1_t) -> poly64x2_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdupq_laneq_f64(a: float64x2_t) -> float64x2_t { static_assert_imm1!(N); - simd_shuffle2(a, a, [N as u32, N as u32]) + simd_shuffle2!(a, a, [N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -3115,7 +3115,7 @@ pub unsafe fn vdupq_laneq_f64(a: float64x2_t) -> float64x2_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdupq_lane_f64(a: float64x1_t) -> float64x2_t { static_assert!(N : i32 where N == 0); - simd_shuffle2(a, a, [N as u32, N as u32]) + simd_shuffle2!(a, a, [N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -3406,8 +3406,8 @@ pub unsafe fn vdupd_laneq_f64(a: float64x2_t) -> f64 { pub unsafe fn vextq_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { static_assert_imm1!(N); match N & 0b1 { - 0 => simd_shuffle2(a, b, [0, 1]), - 1 => simd_shuffle2(a, b, [1, 2]), + 0 => simd_shuffle2!(a, b, [0, 1]), + 1 => simd_shuffle2!(a, b, [1, 2]), _ => unreachable_unchecked(), } } @@ -3420,8 +3420,8 @@ pub unsafe fn vextq_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_ pub unsafe fn vextq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { static_assert_imm1!(N); match N & 0b1 { - 0 => simd_shuffle2(a, b, [0, 1]), - 1 => simd_shuffle2(a, b, [1, 2]), + 0 => simd_shuffle2!(a, b, [0, 1]), + 1 => simd_shuffle2!(a, b, [1, 2]), _ => unreachable_unchecked(), } } @@ -3447,8 +3447,8 @@ pub unsafe fn vmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(smlal2))] pub unsafe fn vmlal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t { - let b: int8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); - let c: int8x8_t = simd_shuffle8(c, c, [8, 9, 10, 11, 12, 13, 14, 15]); + let b: int8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let c: int8x8_t = simd_shuffle8!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]); vmlal_s8(a, b, c) } @@ -3457,8 +3457,8 @@ pub unsafe fn vmlal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8 #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(smlal2))] pub unsafe fn vmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t { - let b: int16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]); - let c: int16x4_t = simd_shuffle4(c, c, [4, 5, 6, 7]); + let b: int16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]); + let c: int16x4_t = simd_shuffle4!(c, c, [4, 5, 6, 7]); vmlal_s16(a, b, c) } @@ -3467,8 +3467,8 @@ pub unsafe fn vmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(smlal2))] pub unsafe fn vmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t { - let b: int32x2_t = simd_shuffle2(b, b, [2, 3]); - let c: int32x2_t = simd_shuffle2(c, c, [2, 3]); + let b: int32x2_t = simd_shuffle2!(b, b, [2, 3]); + let c: int32x2_t = simd_shuffle2!(c, c, [2, 3]); vmlal_s32(a, b, c) } @@ -3477,8 +3477,8 @@ pub unsafe fn vmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(umlal2))] pub unsafe fn vmlal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t { - let b: uint8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); - let c: uint8x8_t = simd_shuffle8(c, c, [8, 9, 10, 11, 12, 13, 14, 15]); + let b: uint8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let c: uint8x8_t = simd_shuffle8!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]); vmlal_u8(a, b, c) } @@ -3487,8 +3487,8 @@ pub unsafe fn vmlal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(umlal2))] pub unsafe fn vmlal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t { - let b: uint16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]); - let c: uint16x4_t = simd_shuffle4(c, c, [4, 5, 6, 7]); + let b: uint16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]); + let c: uint16x4_t = simd_shuffle4!(c, c, [4, 5, 6, 7]); vmlal_u16(a, b, c) } @@ -3497,8 +3497,8 @@ pub unsafe fn vmlal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uin #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(umlal2))] pub unsafe fn vmlal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t { - let b: uint32x2_t = simd_shuffle2(b, b, [2, 3]); - let c: uint32x2_t = simd_shuffle2(c, c, [2, 3]); + let b: uint32x2_t = simd_shuffle2!(b, b, [2, 3]); + let c: uint32x2_t = simd_shuffle2!(c, c, [2, 3]); vmlal_u32(a, b, c) } @@ -3541,7 +3541,7 @@ pub unsafe fn vmlal_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2 #[rustc_legacy_const_generics(3)] pub unsafe fn vmlal_high_lane_s16(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t { static_assert_imm2!(LANE); - vmlal_high_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlal_high_s16(a, b, simd_shuffle8!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply-add long @@ -3551,7 +3551,7 @@ pub unsafe fn vmlal_high_lane_s16(a: int32x4_t, b: int16x8_t, c #[rustc_legacy_const_generics(3)] pub unsafe fn vmlal_high_laneq_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t { static_assert_imm3!(LANE); - vmlal_high_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlal_high_s16(a, b, simd_shuffle8!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply-add long @@ -3561,7 +3561,7 @@ pub unsafe fn vmlal_high_laneq_s16(a: int32x4_t, b: int16x8_t, #[rustc_legacy_const_generics(3)] pub unsafe fn vmlal_high_lane_s32(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t { static_assert_imm1!(LANE); - vmlal_high_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlal_high_s32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply-add long @@ -3571,7 +3571,7 @@ pub unsafe fn vmlal_high_lane_s32(a: int64x2_t, b: int32x4_t, c #[rustc_legacy_const_generics(3)] pub unsafe fn vmlal_high_laneq_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t { static_assert_imm2!(LANE); - vmlal_high_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlal_high_s32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply-add long @@ -3581,7 +3581,7 @@ pub unsafe fn vmlal_high_laneq_s32(a: int64x2_t, b: int32x4_t, #[rustc_legacy_const_generics(3)] pub unsafe fn vmlal_high_lane_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x4_t) -> uint32x4_t { static_assert_imm2!(LANE); - vmlal_high_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlal_high_u16(a, b, simd_shuffle8!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply-add long @@ -3591,7 +3591,7 @@ pub unsafe fn vmlal_high_lane_u16(a: uint32x4_t, b: uint16x8_t, #[rustc_legacy_const_generics(3)] pub unsafe fn vmlal_high_laneq_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t { static_assert_imm3!(LANE); - vmlal_high_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlal_high_u16(a, b, simd_shuffle8!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply-add long @@ -3601,7 +3601,7 @@ pub unsafe fn vmlal_high_laneq_u16(a: uint32x4_t, b: uint16x8_t #[rustc_legacy_const_generics(3)] pub unsafe fn vmlal_high_lane_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x2_t) -> uint64x2_t { static_assert_imm1!(LANE); - vmlal_high_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlal_high_u32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply-add long @@ -3611,7 +3611,7 @@ pub unsafe fn vmlal_high_lane_u32(a: uint64x2_t, b: uint32x4_t, #[rustc_legacy_const_generics(3)] pub unsafe fn vmlal_high_laneq_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t { static_assert_imm2!(LANE); - vmlal_high_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlal_high_u32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Floating-point multiply-subtract from accumulator @@ -3635,8 +3635,8 @@ pub unsafe fn vmlsq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(smlsl2))] pub unsafe fn vmlsl_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t { - let b: int8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); - let c: int8x8_t = simd_shuffle8(c, c, [8, 9, 10, 11, 12, 13, 14, 15]); + let b: int8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let c: int8x8_t = simd_shuffle8!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]); vmlsl_s8(a, b, c) } @@ -3645,8 +3645,8 @@ pub unsafe fn vmlsl_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8 #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(smlsl2))] pub unsafe fn vmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t { - let b: int16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]); - let c: int16x4_t = simd_shuffle4(c, c, [4, 5, 6, 7]); + let b: int16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]); + let c: int16x4_t = simd_shuffle4!(c, c, [4, 5, 6, 7]); vmlsl_s16(a, b, c) } @@ -3655,8 +3655,8 @@ pub unsafe fn vmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(smlsl2))] pub unsafe fn vmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t { - let b: int32x2_t = simd_shuffle2(b, b, [2, 3]); - let c: int32x2_t = simd_shuffle2(c, c, [2, 3]); + let b: int32x2_t = simd_shuffle2!(b, b, [2, 3]); + let c: int32x2_t = simd_shuffle2!(c, c, [2, 3]); vmlsl_s32(a, b, c) } @@ -3665,8 +3665,8 @@ pub unsafe fn vmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(umlsl2))] pub unsafe fn vmlsl_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t { - let b: uint8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); - let c: uint8x8_t = simd_shuffle8(c, c, [8, 9, 10, 11, 12, 13, 14, 15]); + let b: uint8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let c: uint8x8_t = simd_shuffle8!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]); vmlsl_u8(a, b, c) } @@ -3675,8 +3675,8 @@ pub unsafe fn vmlsl_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(umlsl2))] pub unsafe fn vmlsl_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t { - let b: uint16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]); - let c: uint16x4_t = simd_shuffle4(c, c, [4, 5, 6, 7]); + let b: uint16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]); + let c: uint16x4_t = simd_shuffle4!(c, c, [4, 5, 6, 7]); vmlsl_u16(a, b, c) } @@ -3685,8 +3685,8 @@ pub unsafe fn vmlsl_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uin #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(umlsl2))] pub unsafe fn vmlsl_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t { - let b: uint32x2_t = simd_shuffle2(b, b, [2, 3]); - let c: uint32x2_t = simd_shuffle2(c, c, [2, 3]); + let b: uint32x2_t = simd_shuffle2!(b, b, [2, 3]); + let c: uint32x2_t = simd_shuffle2!(c, c, [2, 3]); vmlsl_u32(a, b, c) } @@ -3729,7 +3729,7 @@ pub unsafe fn vmlsl_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2 #[rustc_legacy_const_generics(3)] pub unsafe fn vmlsl_high_lane_s16(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t { static_assert_imm2!(LANE); - vmlsl_high_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlsl_high_s16(a, b, simd_shuffle8!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply-subtract long @@ -3739,7 +3739,7 @@ pub unsafe fn vmlsl_high_lane_s16(a: int32x4_t, b: int16x8_t, c #[rustc_legacy_const_generics(3)] pub unsafe fn vmlsl_high_laneq_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t { static_assert_imm3!(LANE); - vmlsl_high_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlsl_high_s16(a, b, simd_shuffle8!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply-subtract long @@ -3749,7 +3749,7 @@ pub unsafe fn vmlsl_high_laneq_s16(a: int32x4_t, b: int16x8_t, #[rustc_legacy_const_generics(3)] pub unsafe fn vmlsl_high_lane_s32(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t { static_assert_imm1!(LANE); - vmlsl_high_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlsl_high_s32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply-subtract long @@ -3759,7 +3759,7 @@ pub unsafe fn vmlsl_high_lane_s32(a: int64x2_t, b: int32x4_t, c #[rustc_legacy_const_generics(3)] pub unsafe fn vmlsl_high_laneq_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t { static_assert_imm2!(LANE); - vmlsl_high_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlsl_high_s32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply-subtract long @@ -3769,7 +3769,7 @@ pub unsafe fn vmlsl_high_laneq_s32(a: int64x2_t, b: int32x4_t, #[rustc_legacy_const_generics(3)] pub unsafe fn vmlsl_high_lane_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x4_t) -> uint32x4_t { static_assert_imm2!(LANE); - vmlsl_high_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlsl_high_u16(a, b, simd_shuffle8!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply-subtract long @@ -3779,7 +3779,7 @@ pub unsafe fn vmlsl_high_lane_u16(a: uint32x4_t, b: uint16x8_t, #[rustc_legacy_const_generics(3)] pub unsafe fn vmlsl_high_laneq_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t { static_assert_imm3!(LANE); - vmlsl_high_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlsl_high_u16(a, b, simd_shuffle8!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply-subtract long @@ -3789,7 +3789,7 @@ pub unsafe fn vmlsl_high_laneq_u16(a: uint32x4_t, b: uint16x8_t #[rustc_legacy_const_generics(3)] pub unsafe fn vmlsl_high_lane_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x2_t) -> uint64x2_t { static_assert_imm1!(LANE); - vmlsl_high_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlsl_high_u32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply-subtract long @@ -3799,7 +3799,7 @@ pub unsafe fn vmlsl_high_lane_u32(a: uint64x2_t, b: uint32x4_t, #[rustc_legacy_const_generics(3)] pub unsafe fn vmlsl_high_laneq_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t { static_assert_imm2!(LANE); - vmlsl_high_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlsl_high_u32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Extract narrow @@ -3808,7 +3808,7 @@ pub unsafe fn vmlsl_high_laneq_u32(a: uint64x2_t, b: uint32x4_t #[cfg_attr(test, assert_instr(xtn2))] pub unsafe fn vmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { let c: int8x8_t = simd_cast(b); - simd_shuffle16(a, c, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) + simd_shuffle16!(a, c, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } /// Extract narrow @@ -3817,7 +3817,7 @@ pub unsafe fn vmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { #[cfg_attr(test, assert_instr(xtn2))] pub unsafe fn vmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { let c: int16x4_t = simd_cast(b); - simd_shuffle8(a, c, [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(a, c, [0, 1, 2, 3, 4, 5, 6, 7]) } /// Extract narrow @@ -3826,7 +3826,7 @@ pub unsafe fn vmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { #[cfg_attr(test, assert_instr(xtn2))] pub unsafe fn vmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { let c: int32x2_t = simd_cast(b); - simd_shuffle4(a, c, [0, 1, 2, 3]) + simd_shuffle4!(a, c, [0, 1, 2, 3]) } /// Extract narrow @@ -3835,7 +3835,7 @@ pub unsafe fn vmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { #[cfg_attr(test, assert_instr(xtn2))] pub unsafe fn vmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { let c: uint8x8_t = simd_cast(b); - simd_shuffle16(a, c, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) + simd_shuffle16!(a, c, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } /// Extract narrow @@ -3844,7 +3844,7 @@ pub unsafe fn vmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { #[cfg_attr(test, assert_instr(xtn2))] pub unsafe fn vmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { let c: uint16x4_t = simd_cast(b); - simd_shuffle8(a, c, [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(a, c, [0, 1, 2, 3, 4, 5, 6, 7]) } /// Extract narrow @@ -3853,7 +3853,7 @@ pub unsafe fn vmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { #[cfg_attr(test, assert_instr(xtn2))] pub unsafe fn vmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { let c: uint32x2_t = simd_cast(b); - simd_shuffle4(a, c, [0, 1, 2, 3]) + simd_shuffle4!(a, c, [0, 1, 2, 3]) } /// Negate @@ -4529,7 +4529,7 @@ pub unsafe fn vmul_laneq_f64(a: float64x1_t, b: float64x2_t) -> #[rustc_legacy_const_generics(2)] pub unsafe fn vmulq_lane_f64(a: float64x2_t, b: float64x1_t) -> float64x2_t { static_assert!(LANE : i32 where LANE == 0); - simd_mul(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32])) + simd_mul(a, simd_shuffle2!(b, b, [LANE as u32, LANE as u32])) } /// Floating-point multiply @@ -4539,7 +4539,7 @@ pub unsafe fn vmulq_lane_f64(a: float64x2_t, b: float64x1_t) -> #[rustc_legacy_const_generics(2)] pub unsafe fn vmulq_laneq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { static_assert_imm1!(LANE); - simd_mul(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32])) + simd_mul(a, simd_shuffle2!(b, b, [LANE as u32, LANE as u32])) } /// Floating-point multiply @@ -4591,8 +4591,8 @@ pub unsafe fn vmuld_laneq_f64(a: f64, b: float64x2_t) -> f64 { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(smull2))] pub unsafe fn vmull_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t { - let a: int8x8_t = simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); - let b: int8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let a: int8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + let b: int8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); vmull_s8(a, b) } @@ -4601,8 +4601,8 @@ pub unsafe fn vmull_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(smull2))] pub unsafe fn vmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { - let a: int16x4_t = simd_shuffle4(a, a, [4, 5, 6, 7]); - let b: int16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]); + let a: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]); + let b: int16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]); vmull_s16(a, b) } @@ -4611,8 +4611,8 @@ pub unsafe fn vmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(smull2))] pub unsafe fn vmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { - let a: int32x2_t = simd_shuffle2(a, a, [2, 3]); - let b: int32x2_t = simd_shuffle2(b, b, [2, 3]); + let a: int32x2_t = simd_shuffle2!(a, a, [2, 3]); + let b: int32x2_t = simd_shuffle2!(b, b, [2, 3]); vmull_s32(a, b) } @@ -4621,8 +4621,8 @@ pub unsafe fn vmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(umull2))] pub unsafe fn vmull_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t { - let a: uint8x8_t = simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); - let b: uint8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let a: uint8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + let b: uint8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); vmull_u8(a, b) } @@ -4631,8 +4631,8 @@ pub unsafe fn vmull_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(umull2))] pub unsafe fn vmull_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t { - let a: uint16x4_t = simd_shuffle4(a, a, [4, 5, 6, 7]); - let b: uint16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]); + let a: uint16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]); + let b: uint16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]); vmull_u16(a, b) } @@ -4641,8 +4641,8 @@ pub unsafe fn vmull_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(umull2))] pub unsafe fn vmull_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t { - let a: uint32x2_t = simd_shuffle2(a, a, [2, 3]); - let b: uint32x2_t = simd_shuffle2(b, b, [2, 3]); + let a: uint32x2_t = simd_shuffle2!(a, a, [2, 3]); + let b: uint32x2_t = simd_shuffle2!(b, b, [2, 3]); vmull_u32(a, b) } @@ -4651,8 +4651,8 @@ pub unsafe fn vmull_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(pmull))] pub unsafe fn vmull_high_p8(a: poly8x16_t, b: poly8x16_t) -> poly16x8_t { - let a: poly8x8_t = simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); - let b: poly8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let a: poly8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + let b: poly8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); vmull_p8(a, b) } @@ -4695,7 +4695,7 @@ pub unsafe fn vmull_high_n_u32(a: uint32x4_t, b: u32) -> uint64x2_t { #[rustc_legacy_const_generics(2)] pub unsafe fn vmull_high_lane_s16(a: int16x8_t, b: int16x4_t) -> int32x4_t { static_assert_imm2!(LANE); - vmull_high_s16(a, simd_shuffle8(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmull_high_s16(a, simd_shuffle8!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply long @@ -4705,7 +4705,7 @@ pub unsafe fn vmull_high_lane_s16(a: int16x8_t, b: int16x4_t) - #[rustc_legacy_const_generics(2)] pub unsafe fn vmull_high_laneq_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { static_assert_imm3!(LANE); - vmull_high_s16(a, simd_shuffle8(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmull_high_s16(a, simd_shuffle8!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply long @@ -4715,7 +4715,7 @@ pub unsafe fn vmull_high_laneq_s16(a: int16x8_t, b: int16x8_t) #[rustc_legacy_const_generics(2)] pub unsafe fn vmull_high_lane_s32(a: int32x4_t, b: int32x2_t) -> int64x2_t { static_assert_imm1!(LANE); - vmull_high_s32(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmull_high_s32(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply long @@ -4725,7 +4725,7 @@ pub unsafe fn vmull_high_lane_s32(a: int32x4_t, b: int32x2_t) - #[rustc_legacy_const_generics(2)] pub unsafe fn vmull_high_laneq_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { static_assert_imm2!(LANE); - vmull_high_s32(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmull_high_s32(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply long @@ -4735,7 +4735,7 @@ pub unsafe fn vmull_high_laneq_s32(a: int32x4_t, b: int32x4_t) #[rustc_legacy_const_generics(2)] pub unsafe fn vmull_high_lane_u16(a: uint16x8_t, b: uint16x4_t) -> uint32x4_t { static_assert_imm2!(LANE); - vmull_high_u16(a, simd_shuffle8(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmull_high_u16(a, simd_shuffle8!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply long @@ -4745,7 +4745,7 @@ pub unsafe fn vmull_high_lane_u16(a: uint16x8_t, b: uint16x4_t) #[rustc_legacy_const_generics(2)] pub unsafe fn vmull_high_laneq_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t { static_assert_imm3!(LANE); - vmull_high_u16(a, simd_shuffle8(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmull_high_u16(a, simd_shuffle8!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply long @@ -4755,7 +4755,7 @@ pub unsafe fn vmull_high_laneq_u16(a: uint16x8_t, b: uint16x8_t #[rustc_legacy_const_generics(2)] pub unsafe fn vmull_high_lane_u32(a: uint32x4_t, b: uint32x2_t) -> uint64x2_t { static_assert_imm1!(LANE); - vmull_high_u32(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmull_high_u32(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply long @@ -4765,7 +4765,7 @@ pub unsafe fn vmull_high_lane_u32(a: uint32x4_t, b: uint32x2_t) #[rustc_legacy_const_generics(2)] pub unsafe fn vmull_high_laneq_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t { static_assert_imm2!(LANE); - vmull_high_u32(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmull_high_u32(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Floating-point multiply extended @@ -4847,7 +4847,7 @@ pub unsafe fn vmulx_laneq_f64(a: float64x1_t, b: float64x2_t) - #[rustc_legacy_const_generics(2)] pub unsafe fn vmulx_lane_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { static_assert_imm1!(LANE); - vmulx_f32(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32])) + vmulx_f32(a, simd_shuffle2!(b, b, [LANE as u32, LANE as u32])) } /// Floating-point multiply extended @@ -4857,7 +4857,7 @@ pub unsafe fn vmulx_lane_f32(a: float32x2_t, b: float32x2_t) -> #[rustc_legacy_const_generics(2)] pub unsafe fn vmulx_laneq_f32(a: float32x2_t, b: float32x4_t) -> float32x2_t { static_assert_imm2!(LANE); - vmulx_f32(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32])) + vmulx_f32(a, simd_shuffle2!(b, b, [LANE as u32, LANE as u32])) } /// Floating-point multiply extended @@ -4867,7 +4867,7 @@ pub unsafe fn vmulx_laneq_f32(a: float32x2_t, b: float32x4_t) - #[rustc_legacy_const_generics(2)] pub unsafe fn vmulxq_lane_f32(a: float32x4_t, b: float32x2_t) -> float32x4_t { static_assert_imm1!(LANE); - vmulxq_f32(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmulxq_f32(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Floating-point multiply extended @@ -4877,7 +4877,7 @@ pub unsafe fn vmulxq_lane_f32(a: float32x4_t, b: float32x2_t) - #[rustc_legacy_const_generics(2)] pub unsafe fn vmulxq_laneq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { static_assert_imm2!(LANE); - vmulxq_f32(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmulxq_f32(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Floating-point multiply extended @@ -4887,7 +4887,7 @@ pub unsafe fn vmulxq_laneq_f32(a: float32x4_t, b: float32x4_t) #[rustc_legacy_const_generics(2)] pub unsafe fn vmulxq_lane_f64(a: float64x2_t, b: float64x1_t) -> float64x2_t { static_assert!(LANE : i32 where LANE == 0); - vmulxq_f64(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32])) + vmulxq_f64(a, simd_shuffle2!(b, b, [LANE as u32, LANE as u32])) } /// Floating-point multiply extended @@ -4897,7 +4897,7 @@ pub unsafe fn vmulxq_lane_f64(a: float64x2_t, b: float64x1_t) - #[rustc_legacy_const_generics(2)] pub unsafe fn vmulxq_laneq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { static_assert_imm1!(LANE); - vmulxq_f64(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32])) + vmulxq_f64(a, simd_shuffle2!(b, b, [LANE as u32, LANE as u32])) } /// Floating-point multiply extended @@ -5498,8 +5498,8 @@ pub unsafe fn vqdmulls_s32(a: i32, b: i32) -> i64 { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sqdmull2))] pub unsafe fn vqdmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { - let a: int16x4_t = simd_shuffle4(a, a, [4, 5, 6, 7]); - let b: int16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]); + let a: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]); + let b: int16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]); vqdmull_s16(a, b) } @@ -5508,8 +5508,8 @@ pub unsafe fn vqdmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sqdmull2))] pub unsafe fn vqdmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { - let a: int32x2_t = simd_shuffle2(a, a, [2, 3]); - let b: int32x2_t = simd_shuffle2(b, b, [2, 3]); + let a: int32x2_t = simd_shuffle2!(a, a, [2, 3]); + let b: int32x2_t = simd_shuffle2!(b, b, [2, 3]); vqdmull_s32(a, b) } @@ -5518,7 +5518,7 @@ pub unsafe fn vqdmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sqdmull2))] pub unsafe fn vqdmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t { - let a: int16x4_t = simd_shuffle4(a, a, [4, 5, 6, 7]); + let a: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]); let b: int16x4_t = vdup_n_s16(b); vqdmull_s16(a, b) } @@ -5528,7 +5528,7 @@ pub unsafe fn vqdmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sqdmull2))] pub unsafe fn vqdmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t { - let a: int32x2_t = simd_shuffle2(a, a, [2, 3]); + let a: int32x2_t = simd_shuffle2!(a, a, [2, 3]); let b: int32x2_t = vdup_n_s32(b); vqdmull_s32(a, b) } @@ -5540,7 +5540,7 @@ pub unsafe fn vqdmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t { #[rustc_legacy_const_generics(2)] pub unsafe fn vqdmull_laneq_s16(a: int16x4_t, b: int16x8_t) -> int32x4_t { static_assert_imm3!(N); - let b: int16x4_t = simd_shuffle4(b, b, [N as u32, N as u32, N as u32, N as u32]); + let b: int16x4_t = simd_shuffle4!(b, b, [N as u32, N as u32, N as u32, N as u32]); vqdmull_s16(a, b) } @@ -5551,7 +5551,7 @@ pub unsafe fn vqdmull_laneq_s16(a: int16x4_t, b: int16x8_t) -> int #[rustc_legacy_const_generics(2)] pub unsafe fn vqdmull_laneq_s32(a: int32x2_t, b: int32x4_t) -> int64x2_t { static_assert_imm2!(N); - let b: int32x2_t = simd_shuffle2(b, b, [N as u32, N as u32]); + let b: int32x2_t = simd_shuffle2!(b, b, [N as u32, N as u32]); vqdmull_s32(a, b) } @@ -5606,8 +5606,8 @@ pub unsafe fn vqdmulls_laneq_s32(a: i32, b: int32x4_t) -> i64 { #[rustc_legacy_const_generics(2)] pub unsafe fn vqdmull_high_lane_s16(a: int16x8_t, b: int16x4_t) -> int32x4_t { static_assert_imm2!(N); - let a: int16x4_t = simd_shuffle4(a, a, [4, 5, 6, 7]); - let b: int16x4_t = simd_shuffle4(b, b, [N as u32, N as u32, N as u32, N as u32]); + let a: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]); + let b: int16x4_t = simd_shuffle4!(b, b, [N as u32, N as u32, N as u32, N as u32]); vqdmull_s16(a, b) } @@ -5618,8 +5618,8 @@ pub unsafe fn vqdmull_high_lane_s16(a: int16x8_t, b: int16x4_t) -> #[rustc_legacy_const_generics(2)] pub unsafe fn vqdmull_high_lane_s32(a: int32x4_t, b: int32x2_t) -> int64x2_t { static_assert_imm1!(N); - let a: int32x2_t = simd_shuffle2(a, a, [2, 3]); - let b: int32x2_t = simd_shuffle2(b, b, [N as u32, N as u32]); + let a: int32x2_t = simd_shuffle2!(a, a, [2, 3]); + let b: int32x2_t = simd_shuffle2!(b, b, [N as u32, N as u32]); vqdmull_s32(a, b) } @@ -5630,8 +5630,8 @@ pub unsafe fn vqdmull_high_lane_s32(a: int32x4_t, b: int32x2_t) -> #[rustc_legacy_const_generics(2)] pub unsafe fn vqdmull_high_laneq_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { static_assert_imm3!(N); - let a: int16x4_t = simd_shuffle4(a, a, [4, 5, 6, 7]); - let b: int16x4_t = simd_shuffle4(b, b, [N as u32, N as u32, N as u32, N as u32]); + let a: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]); + let b: int16x4_t = simd_shuffle4!(b, b, [N as u32, N as u32, N as u32, N as u32]); vqdmull_s16(a, b) } @@ -5642,8 +5642,8 @@ pub unsafe fn vqdmull_high_laneq_s16(a: int16x8_t, b: int16x8_t) - #[rustc_legacy_const_generics(2)] pub unsafe fn vqdmull_high_laneq_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { static_assert_imm2!(N); - let a: int32x2_t = simd_shuffle2(a, a, [2, 3]); - let b: int32x2_t = simd_shuffle2(b, b, [N as u32, N as u32]); + let a: int32x2_t = simd_shuffle2!(a, a, [2, 3]); + let b: int32x2_t = simd_shuffle2!(b, b, [N as u32, N as u32]); vqdmull_s32(a, b) } @@ -6183,7 +6183,7 @@ pub unsafe fn vqrshrnd_n_s64(a: i64) -> i32 { #[rustc_legacy_const_generics(2)] pub unsafe fn vqrshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { static_assert!(N : i32 where N >= 1 && N <= 8); - simd_shuffle16(a, vqrshrn_n_s16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) + simd_shuffle16!(a, vqrshrn_n_s16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } /// Signed saturating rounded shift right narrow @@ -6193,7 +6193,7 @@ pub unsafe fn vqrshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int #[rustc_legacy_const_generics(2)] pub unsafe fn vqrshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { static_assert!(N : i32 where N >= 1 && N <= 16); - simd_shuffle8(a, vqrshrn_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(a, vqrshrn_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } /// Signed saturating rounded shift right narrow @@ -6203,7 +6203,7 @@ pub unsafe fn vqrshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> in #[rustc_legacy_const_generics(2)] pub unsafe fn vqrshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { static_assert!(N : i32 where N >= 1 && N <= 32); - simd_shuffle4(a, vqrshrn_n_s64::(b), [0, 1, 2, 3]) + simd_shuffle4!(a, vqrshrn_n_s64::(b), [0, 1, 2, 3]) } /// Unsigned saturating rounded shift right narrow @@ -6246,7 +6246,7 @@ pub unsafe fn vqrshrnd_n_u64(a: u64) -> u32 { #[rustc_legacy_const_generics(2)] pub unsafe fn vqrshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { static_assert!(N : i32 where N >= 1 && N <= 8); - simd_shuffle16(a, vqrshrn_n_u16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) + simd_shuffle16!(a, vqrshrn_n_u16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } /// Unsigned saturating rounded shift right narrow @@ -6256,7 +6256,7 @@ pub unsafe fn vqrshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> u #[rustc_legacy_const_generics(2)] pub unsafe fn vqrshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { static_assert!(N : i32 where N >= 1 && N <= 16); - simd_shuffle8(a, vqrshrn_n_u32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(a, vqrshrn_n_u32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } /// Unsigned saturating rounded shift right narrow @@ -6266,7 +6266,7 @@ pub unsafe fn vqrshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> #[rustc_legacy_const_generics(2)] pub unsafe fn vqrshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { static_assert!(N : i32 where N >= 1 && N <= 32); - simd_shuffle4(a, vqrshrn_n_u64::(b), [0, 1, 2, 3]) + simd_shuffle4!(a, vqrshrn_n_u64::(b), [0, 1, 2, 3]) } /// Signed saturating rounded shift right unsigned narrow @@ -6309,7 +6309,7 @@ pub unsafe fn vqrshrund_n_s64(a: i64) -> u32 { #[rustc_legacy_const_generics(2)] pub unsafe fn vqrshrun_high_n_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16_t { static_assert!(N : i32 where N >= 1 && N <= 8); - simd_shuffle16(a, vqrshrun_n_s16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) + simd_shuffle16!(a, vqrshrun_n_s16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } /// Signed saturating rounded shift right unsigned narrow @@ -6319,7 +6319,7 @@ pub unsafe fn vqrshrun_high_n_s16(a: uint8x8_t, b: int16x8_t) -> u #[rustc_legacy_const_generics(2)] pub unsafe fn vqrshrun_high_n_s32(a: uint16x4_t, b: int32x4_t) -> uint16x8_t { static_assert!(N : i32 where N >= 1 && N <= 16); - simd_shuffle8(a, vqrshrun_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(a, vqrshrun_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } /// Signed saturating rounded shift right unsigned narrow @@ -6329,7 +6329,7 @@ pub unsafe fn vqrshrun_high_n_s32(a: uint16x4_t, b: int32x4_t) -> #[rustc_legacy_const_generics(2)] pub unsafe fn vqrshrun_high_n_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t { static_assert!(N : i32 where N >= 1 && N <= 32); - simd_shuffle4(a, vqrshrun_n_s64::(b), [0, 1, 2, 3]) + simd_shuffle4!(a, vqrshrun_n_s64::(b), [0, 1, 2, 3]) } /// Signed saturating shift left @@ -6521,7 +6521,7 @@ pub unsafe fn vqshrnd_n_s64(a: i64) -> i32 { #[rustc_legacy_const_generics(2)] pub unsafe fn vqshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { static_assert!(N : i32 where N >= 1 && N <= 8); - simd_shuffle16(a, vqshrn_n_s16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) + simd_shuffle16!(a, vqshrn_n_s16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } /// Signed saturating shift right narrow @@ -6531,7 +6531,7 @@ pub unsafe fn vqshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8 #[rustc_legacy_const_generics(2)] pub unsafe fn vqshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { static_assert!(N : i32 where N >= 1 && N <= 16); - simd_shuffle8(a, vqshrn_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(a, vqshrn_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } /// Signed saturating shift right narrow @@ -6541,7 +6541,7 @@ pub unsafe fn vqshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int #[rustc_legacy_const_generics(2)] pub unsafe fn vqshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { static_assert!(N : i32 where N >= 1 && N <= 32); - simd_shuffle4(a, vqshrn_n_s64::(b), [0, 1, 2, 3]) + simd_shuffle4!(a, vqshrn_n_s64::(b), [0, 1, 2, 3]) } /// Unsigned saturating shift right narrow @@ -6581,7 +6581,7 @@ pub unsafe fn vqshrnd_n_u64(a: u64) -> u32 { #[rustc_legacy_const_generics(2)] pub unsafe fn vqshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { static_assert!(N : i32 where N >= 1 && N <= 8); - simd_shuffle16(a, vqshrn_n_u16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) + simd_shuffle16!(a, vqshrn_n_u16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } /// Unsigned saturating shift right narrow @@ -6591,7 +6591,7 @@ pub unsafe fn vqshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> ui #[rustc_legacy_const_generics(2)] pub unsafe fn vqshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { static_assert!(N : i32 where N >= 1 && N <= 16); - simd_shuffle8(a, vqshrn_n_u32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(a, vqshrn_n_u32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } /// Unsigned saturating shift right narrow @@ -6601,7 +6601,7 @@ pub unsafe fn vqshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> u #[rustc_legacy_const_generics(2)] pub unsafe fn vqshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { static_assert!(N : i32 where N >= 1 && N <= 32); - simd_shuffle4(a, vqshrn_n_u64::(b), [0, 1, 2, 3]) + simd_shuffle4!(a, vqshrn_n_u64::(b), [0, 1, 2, 3]) } /// Signed saturating shift right unsigned narrow @@ -6641,7 +6641,7 @@ pub unsafe fn vqshrund_n_s64(a: i64) -> u32 { #[rustc_legacy_const_generics(2)] pub unsafe fn vqshrun_high_n_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16_t { static_assert!(N : i32 where N >= 1 && N <= 8); - simd_shuffle16(a, vqshrun_n_s16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) + simd_shuffle16!(a, vqshrun_n_s16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } /// Signed saturating shift right unsigned narrow @@ -6651,7 +6651,7 @@ pub unsafe fn vqshrun_high_n_s16(a: uint8x8_t, b: int16x8_t) -> ui #[rustc_legacy_const_generics(2)] pub unsafe fn vqshrun_high_n_s32(a: uint16x4_t, b: int32x4_t) -> uint16x8_t { static_assert!(N : i32 where N >= 1 && N <= 16); - simd_shuffle8(a, vqshrun_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(a, vqshrun_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } /// Signed saturating shift right unsigned narrow @@ -6661,7 +6661,7 @@ pub unsafe fn vqshrun_high_n_s32(a: uint16x4_t, b: int32x4_t) -> u #[rustc_legacy_const_generics(2)] pub unsafe fn vqshrun_high_n_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t { static_assert!(N : i32 where N >= 1 && N <= 32); - simd_shuffle4(a, vqshrun_n_s64::(b), [0, 1, 2, 3]) + simd_shuffle4!(a, vqshrun_n_s64::(b), [0, 1, 2, 3]) } /// Calculates the square root of each lane. @@ -7527,7 +7527,7 @@ pub unsafe fn vrshrd_n_u64(a: u64) -> u64 { #[rustc_legacy_const_generics(2)] pub unsafe fn vrshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { static_assert!(N : i32 where N >= 1 && N <= 8); - simd_shuffle16(a, vrshrn_n_s16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) + simd_shuffle16!(a, vrshrn_n_s16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } /// Rounding shift right narrow @@ -7537,7 +7537,7 @@ pub unsafe fn vrshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8 #[rustc_legacy_const_generics(2)] pub unsafe fn vrshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { static_assert!(N : i32 where N >= 1 && N <= 16); - simd_shuffle8(a, vrshrn_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(a, vrshrn_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } /// Rounding shift right narrow @@ -7547,7 +7547,7 @@ pub unsafe fn vrshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int #[rustc_legacy_const_generics(2)] pub unsafe fn vrshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { static_assert!(N : i32 where N >= 1 && N <= 32); - simd_shuffle4(a, vrshrn_n_s64::(b), [0, 1, 2, 3]) + simd_shuffle4!(a, vrshrn_n_s64::(b), [0, 1, 2, 3]) } /// Rounding shift right narrow @@ -7557,7 +7557,7 @@ pub unsafe fn vrshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int #[rustc_legacy_const_generics(2)] pub unsafe fn vrshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { static_assert!(N : i32 where N >= 1 && N <= 8); - simd_shuffle16(a, vrshrn_n_u16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) + simd_shuffle16!(a, vrshrn_n_u16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } /// Rounding shift right narrow @@ -7567,7 +7567,7 @@ pub unsafe fn vrshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> ui #[rustc_legacy_const_generics(2)] pub unsafe fn vrshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { static_assert!(N : i32 where N >= 1 && N <= 16); - simd_shuffle8(a, vrshrn_n_u32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(a, vrshrn_n_u32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } /// Rounding shift right narrow @@ -7577,7 +7577,7 @@ pub unsafe fn vrshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> u #[rustc_legacy_const_generics(2)] pub unsafe fn vrshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { static_assert!(N : i32 where N >= 1 && N <= 32); - simd_shuffle4(a, vrshrn_n_u64::(b), [0, 1, 2, 3]) + simd_shuffle4!(a, vrshrn_n_u64::(b), [0, 1, 2, 3]) } /// Signed rounding shift right and accumulate. @@ -7625,7 +7625,7 @@ pub unsafe fn vshld_u64(a: u64, b: i64) -> u64 { #[rustc_legacy_const_generics(1)] pub unsafe fn vshll_high_n_s8(a: int8x16_t) -> int16x8_t { static_assert!(N : i32 where N >= 0 && N <= 8); - let b: int8x8_t = simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + let b: int8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); vshll_n_s8::(b) } @@ -7636,7 +7636,7 @@ pub unsafe fn vshll_high_n_s8(a: int8x16_t) -> int16x8_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vshll_high_n_s16(a: int16x8_t) -> int32x4_t { static_assert!(N : i32 where N >= 0 && N <= 16); - let b: int16x4_t = simd_shuffle4(a, a, [4, 5, 6, 7]); + let b: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]); vshll_n_s16::(b) } @@ -7647,7 +7647,7 @@ pub unsafe fn vshll_high_n_s16(a: int16x8_t) -> int32x4_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vshll_high_n_s32(a: int32x4_t) -> int64x2_t { static_assert!(N : i32 where N >= 0 && N <= 32); - let b: int32x2_t = simd_shuffle2(a, a, [2, 3]); + let b: int32x2_t = simd_shuffle2!(a, a, [2, 3]); vshll_n_s32::(b) } @@ -7658,7 +7658,7 @@ pub unsafe fn vshll_high_n_s32(a: int32x4_t) -> int64x2_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vshll_high_n_u8(a: uint8x16_t) -> uint16x8_t { static_assert!(N : i32 where N >= 0 && N <= 8); - let b: uint8x8_t = simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + let b: uint8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); vshll_n_u8::(b) } @@ -7669,7 +7669,7 @@ pub unsafe fn vshll_high_n_u8(a: uint8x16_t) -> uint16x8_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vshll_high_n_u16(a: uint16x8_t) -> uint32x4_t { static_assert!(N : i32 where N >= 0 && N <= 16); - let b: uint16x4_t = simd_shuffle4(a, a, [4, 5, 6, 7]); + let b: uint16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]); vshll_n_u16::(b) } @@ -7680,7 +7680,7 @@ pub unsafe fn vshll_high_n_u16(a: uint16x8_t) -> uint32x4_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vshll_high_n_u32(a: uint32x4_t) -> uint64x2_t { static_assert!(N : i32 where N >= 0 && N <= 32); - let b: uint32x2_t = simd_shuffle2(a, a, [2, 3]); + let b: uint32x2_t = simd_shuffle2!(a, a, [2, 3]); vshll_n_u32::(b) } @@ -7691,7 +7691,7 @@ pub unsafe fn vshll_high_n_u32(a: uint32x4_t) -> uint64x2_t { #[rustc_legacy_const_generics(2)] pub unsafe fn vshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { static_assert!(N : i32 where N >= 1 && N <= 8); - simd_shuffle16(a, vshrn_n_s16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) + simd_shuffle16!(a, vshrn_n_s16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } /// Shift right narrow @@ -7701,7 +7701,7 @@ pub unsafe fn vshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x #[rustc_legacy_const_generics(2)] pub unsafe fn vshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { static_assert!(N : i32 where N >= 1 && N <= 16); - simd_shuffle8(a, vshrn_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(a, vshrn_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } /// Shift right narrow @@ -7711,7 +7711,7 @@ pub unsafe fn vshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int1 #[rustc_legacy_const_generics(2)] pub unsafe fn vshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { static_assert!(N : i32 where N >= 1 && N <= 32); - simd_shuffle4(a, vshrn_n_s64::(b), [0, 1, 2, 3]) + simd_shuffle4!(a, vshrn_n_s64::(b), [0, 1, 2, 3]) } /// Shift right narrow @@ -7721,7 +7721,7 @@ pub unsafe fn vshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int3 #[rustc_legacy_const_generics(2)] pub unsafe fn vshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { static_assert!(N : i32 where N >= 1 && N <= 8); - simd_shuffle16(a, vshrn_n_u16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) + simd_shuffle16!(a, vshrn_n_u16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } /// Shift right narrow @@ -7731,7 +7731,7 @@ pub unsafe fn vshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uin #[rustc_legacy_const_generics(2)] pub unsafe fn vshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { static_assert!(N : i32 where N >= 1 && N <= 16); - simd_shuffle8(a, vshrn_n_u32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(a, vshrn_n_u32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } /// Shift right narrow @@ -7741,7 +7741,7 @@ pub unsafe fn vshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> ui #[rustc_legacy_const_generics(2)] pub unsafe fn vshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { static_assert!(N : i32 where N >= 1 && N <= 32); - simd_shuffle4(a, vshrn_n_u64::(b), [0, 1, 2, 3]) + simd_shuffle4!(a, vshrn_n_u64::(b), [0, 1, 2, 3]) } /// Transpose vectors @@ -7749,7 +7749,7 @@ pub unsafe fn vshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> ui #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn1))] pub unsafe fn vtrn1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - simd_shuffle8(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) + simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } /// Transpose vectors @@ -7757,7 +7757,7 @@ pub unsafe fn vtrn1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn1))] pub unsafe fn vtrn1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - simd_shuffle16(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]) + simd_shuffle16!(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]) } /// Transpose vectors @@ -7765,7 +7765,7 @@ pub unsafe fn vtrn1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn1))] pub unsafe fn vtrn1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - simd_shuffle4(a, b, [0, 4, 2, 6]) + simd_shuffle4!(a, b, [0, 4, 2, 6]) } /// Transpose vectors @@ -7773,7 +7773,7 @@ pub unsafe fn vtrn1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn1))] pub unsafe fn vtrn1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - simd_shuffle8(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) + simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } /// Transpose vectors @@ -7781,7 +7781,7 @@ pub unsafe fn vtrn1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn1))] pub unsafe fn vtrn1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - simd_shuffle4(a, b, [0, 4, 2, 6]) + simd_shuffle4!(a, b, [0, 4, 2, 6]) } /// Transpose vectors @@ -7789,7 +7789,7 @@ pub unsafe fn vtrn1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn1))] pub unsafe fn vtrn1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - simd_shuffle8(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) + simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } /// Transpose vectors @@ -7797,7 +7797,7 @@ pub unsafe fn vtrn1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn1))] pub unsafe fn vtrn1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - simd_shuffle16(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]) + simd_shuffle16!(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]) } /// Transpose vectors @@ -7805,7 +7805,7 @@ pub unsafe fn vtrn1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn1))] pub unsafe fn vtrn1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - simd_shuffle4(a, b, [0, 4, 2, 6]) + simd_shuffle4!(a, b, [0, 4, 2, 6]) } /// Transpose vectors @@ -7813,7 +7813,7 @@ pub unsafe fn vtrn1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn1))] pub unsafe fn vtrn1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - simd_shuffle8(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) + simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } /// Transpose vectors @@ -7821,7 +7821,7 @@ pub unsafe fn vtrn1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn1))] pub unsafe fn vtrn1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - simd_shuffle4(a, b, [0, 4, 2, 6]) + simd_shuffle4!(a, b, [0, 4, 2, 6]) } /// Transpose vectors @@ -7829,7 +7829,7 @@ pub unsafe fn vtrn1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn1))] pub unsafe fn vtrn1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - simd_shuffle8(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) + simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } /// Transpose vectors @@ -7837,7 +7837,7 @@ pub unsafe fn vtrn1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn1))] pub unsafe fn vtrn1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { - simd_shuffle16(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]) + simd_shuffle16!(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]) } /// Transpose vectors @@ -7845,7 +7845,7 @@ pub unsafe fn vtrn1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn1))] pub unsafe fn vtrn1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - simd_shuffle4(a, b, [0, 4, 2, 6]) + simd_shuffle4!(a, b, [0, 4, 2, 6]) } /// Transpose vectors @@ -7853,7 +7853,7 @@ pub unsafe fn vtrn1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn1))] pub unsafe fn vtrn1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - simd_shuffle8(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) + simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) } /// Transpose vectors @@ -7861,7 +7861,7 @@ pub unsafe fn vtrn1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vtrn1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - simd_shuffle2(a, b, [0, 2]) + simd_shuffle2!(a, b, [0, 2]) } /// Transpose vectors @@ -7869,7 +7869,7 @@ pub unsafe fn vtrn1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vtrn1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - simd_shuffle2(a, b, [0, 2]) + simd_shuffle2!(a, b, [0, 2]) } /// Transpose vectors @@ -7877,7 +7877,7 @@ pub unsafe fn vtrn1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vtrn1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - simd_shuffle2(a, b, [0, 2]) + simd_shuffle2!(a, b, [0, 2]) } /// Transpose vectors @@ -7885,7 +7885,7 @@ pub unsafe fn vtrn1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vtrn1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - simd_shuffle2(a, b, [0, 2]) + simd_shuffle2!(a, b, [0, 2]) } /// Transpose vectors @@ -7893,7 +7893,7 @@ pub unsafe fn vtrn1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vtrn1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { - simd_shuffle2(a, b, [0, 2]) + simd_shuffle2!(a, b, [0, 2]) } /// Transpose vectors @@ -7901,7 +7901,7 @@ pub unsafe fn vtrn1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn1))] pub unsafe fn vtrn1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - simd_shuffle4(a, b, [0, 4, 2, 6]) + simd_shuffle4!(a, b, [0, 4, 2, 6]) } /// Transpose vectors @@ -7909,7 +7909,7 @@ pub unsafe fn vtrn1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vtrn1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - simd_shuffle2(a, b, [0, 2]) + simd_shuffle2!(a, b, [0, 2]) } /// Transpose vectors @@ -7917,7 +7917,7 @@ pub unsafe fn vtrn1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vtrn1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { - simd_shuffle2(a, b, [0, 2]) + simd_shuffle2!(a, b, [0, 2]) } /// Transpose vectors @@ -7925,7 +7925,7 @@ pub unsafe fn vtrn1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn2))] pub unsafe fn vtrn2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - simd_shuffle8(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) + simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } /// Transpose vectors @@ -7933,7 +7933,7 @@ pub unsafe fn vtrn2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn2))] pub unsafe fn vtrn2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - simd_shuffle16(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]) + simd_shuffle16!(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]) } /// Transpose vectors @@ -7941,7 +7941,7 @@ pub unsafe fn vtrn2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn2))] pub unsafe fn vtrn2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - simd_shuffle4(a, b, [1, 5, 3, 7]) + simd_shuffle4!(a, b, [1, 5, 3, 7]) } /// Transpose vectors @@ -7949,7 +7949,7 @@ pub unsafe fn vtrn2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn2))] pub unsafe fn vtrn2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - simd_shuffle8(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) + simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } /// Transpose vectors @@ -7957,7 +7957,7 @@ pub unsafe fn vtrn2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn2))] pub unsafe fn vtrn2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - simd_shuffle4(a, b, [1, 5, 3, 7]) + simd_shuffle4!(a, b, [1, 5, 3, 7]) } /// Transpose vectors @@ -7965,7 +7965,7 @@ pub unsafe fn vtrn2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn2))] pub unsafe fn vtrn2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - simd_shuffle8(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) + simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } /// Transpose vectors @@ -7973,7 +7973,7 @@ pub unsafe fn vtrn2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn2))] pub unsafe fn vtrn2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - simd_shuffle16(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]) + simd_shuffle16!(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]) } /// Transpose vectors @@ -7981,7 +7981,7 @@ pub unsafe fn vtrn2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn2))] pub unsafe fn vtrn2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - simd_shuffle4(a, b, [1, 5, 3, 7]) + simd_shuffle4!(a, b, [1, 5, 3, 7]) } /// Transpose vectors @@ -7989,7 +7989,7 @@ pub unsafe fn vtrn2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn2))] pub unsafe fn vtrn2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - simd_shuffle8(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) + simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } /// Transpose vectors @@ -7997,7 +7997,7 @@ pub unsafe fn vtrn2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn2))] pub unsafe fn vtrn2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - simd_shuffle4(a, b, [1, 5, 3, 7]) + simd_shuffle4!(a, b, [1, 5, 3, 7]) } /// Transpose vectors @@ -8005,7 +8005,7 @@ pub unsafe fn vtrn2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn2))] pub unsafe fn vtrn2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - simd_shuffle8(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) + simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } /// Transpose vectors @@ -8013,7 +8013,7 @@ pub unsafe fn vtrn2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn2))] pub unsafe fn vtrn2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { - simd_shuffle16(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]) + simd_shuffle16!(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]) } /// Transpose vectors @@ -8021,7 +8021,7 @@ pub unsafe fn vtrn2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn2))] pub unsafe fn vtrn2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - simd_shuffle4(a, b, [1, 5, 3, 7]) + simd_shuffle4!(a, b, [1, 5, 3, 7]) } /// Transpose vectors @@ -8029,7 +8029,7 @@ pub unsafe fn vtrn2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn2))] pub unsafe fn vtrn2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - simd_shuffle8(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) + simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) } /// Transpose vectors @@ -8037,7 +8037,7 @@ pub unsafe fn vtrn2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vtrn2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - simd_shuffle2(a, b, [1, 3]) + simd_shuffle2!(a, b, [1, 3]) } /// Transpose vectors @@ -8045,7 +8045,7 @@ pub unsafe fn vtrn2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vtrn2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - simd_shuffle2(a, b, [1, 3]) + simd_shuffle2!(a, b, [1, 3]) } /// Transpose vectors @@ -8053,7 +8053,7 @@ pub unsafe fn vtrn2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vtrn2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - simd_shuffle2(a, b, [1, 3]) + simd_shuffle2!(a, b, [1, 3]) } /// Transpose vectors @@ -8061,7 +8061,7 @@ pub unsafe fn vtrn2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vtrn2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - simd_shuffle2(a, b, [1, 3]) + simd_shuffle2!(a, b, [1, 3]) } /// Transpose vectors @@ -8069,7 +8069,7 @@ pub unsafe fn vtrn2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vtrn2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { - simd_shuffle2(a, b, [1, 3]) + simd_shuffle2!(a, b, [1, 3]) } /// Transpose vectors @@ -8077,7 +8077,7 @@ pub unsafe fn vtrn2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(trn2))] pub unsafe fn vtrn2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - simd_shuffle4(a, b, [1, 5, 3, 7]) + simd_shuffle4!(a, b, [1, 5, 3, 7]) } /// Transpose vectors @@ -8085,7 +8085,7 @@ pub unsafe fn vtrn2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vtrn2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - simd_shuffle2(a, b, [1, 3]) + simd_shuffle2!(a, b, [1, 3]) } /// Transpose vectors @@ -8093,7 +8093,7 @@ pub unsafe fn vtrn2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vtrn2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { - simd_shuffle2(a, b, [1, 3]) + simd_shuffle2!(a, b, [1, 3]) } /// Zip vectors @@ -8101,7 +8101,7 @@ pub unsafe fn vtrn2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vzip1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - simd_shuffle8(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) + simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } /// Zip vectors @@ -8109,7 +8109,7 @@ pub unsafe fn vzip1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vzip1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - simd_shuffle16(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]) + simd_shuffle16!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]) } /// Zip vectors @@ -8117,7 +8117,7 @@ pub unsafe fn vzip1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vzip1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - simd_shuffle4(a, b, [0, 4, 1, 5]) + simd_shuffle4!(a, b, [0, 4, 1, 5]) } /// Zip vectors @@ -8125,7 +8125,7 @@ pub unsafe fn vzip1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vzip1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - simd_shuffle8(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) + simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } /// Zip vectors @@ -8133,7 +8133,7 @@ pub unsafe fn vzip1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vzip1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - simd_shuffle2(a, b, [0, 2]) + simd_shuffle2!(a, b, [0, 2]) } /// Zip vectors @@ -8141,7 +8141,7 @@ pub unsafe fn vzip1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vzip1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - simd_shuffle4(a, b, [0, 4, 1, 5]) + simd_shuffle4!(a, b, [0, 4, 1, 5]) } /// Zip vectors @@ -8149,7 +8149,7 @@ pub unsafe fn vzip1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vzip1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - simd_shuffle2(a, b, [0, 2]) + simd_shuffle2!(a, b, [0, 2]) } /// Zip vectors @@ -8157,7 +8157,7 @@ pub unsafe fn vzip1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vzip1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - simd_shuffle8(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) + simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } /// Zip vectors @@ -8165,7 +8165,7 @@ pub unsafe fn vzip1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vzip1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - simd_shuffle16(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]) + simd_shuffle16!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]) } /// Zip vectors @@ -8173,7 +8173,7 @@ pub unsafe fn vzip1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vzip1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - simd_shuffle4(a, b, [0, 4, 1, 5]) + simd_shuffle4!(a, b, [0, 4, 1, 5]) } /// Zip vectors @@ -8181,7 +8181,7 @@ pub unsafe fn vzip1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vzip1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - simd_shuffle8(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) + simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } /// Zip vectors @@ -8189,7 +8189,7 @@ pub unsafe fn vzip1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vzip1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - simd_shuffle2(a, b, [0, 2]) + simd_shuffle2!(a, b, [0, 2]) } /// Zip vectors @@ -8197,7 +8197,7 @@ pub unsafe fn vzip1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vzip1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - simd_shuffle4(a, b, [0, 4, 1, 5]) + simd_shuffle4!(a, b, [0, 4, 1, 5]) } /// Zip vectors @@ -8205,7 +8205,7 @@ pub unsafe fn vzip1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vzip1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - simd_shuffle2(a, b, [0, 2]) + simd_shuffle2!(a, b, [0, 2]) } /// Zip vectors @@ -8213,7 +8213,7 @@ pub unsafe fn vzip1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vzip1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - simd_shuffle8(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) + simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } /// Zip vectors @@ -8221,7 +8221,7 @@ pub unsafe fn vzip1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vzip1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { - simd_shuffle16(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]) + simd_shuffle16!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]) } /// Zip vectors @@ -8229,7 +8229,7 @@ pub unsafe fn vzip1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vzip1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - simd_shuffle4(a, b, [0, 4, 1, 5]) + simd_shuffle4!(a, b, [0, 4, 1, 5]) } /// Zip vectors @@ -8237,7 +8237,7 @@ pub unsafe fn vzip1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vzip1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - simd_shuffle8(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) + simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) } /// Zip vectors @@ -8245,7 +8245,7 @@ pub unsafe fn vzip1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vzip1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { - simd_shuffle2(a, b, [0, 2]) + simd_shuffle2!(a, b, [0, 2]) } /// Zip vectors @@ -8253,7 +8253,7 @@ pub unsafe fn vzip1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vzip1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - simd_shuffle2(a, b, [0, 2]) + simd_shuffle2!(a, b, [0, 2]) } /// Zip vectors @@ -8261,7 +8261,7 @@ pub unsafe fn vzip1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vzip1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - simd_shuffle4(a, b, [0, 4, 1, 5]) + simd_shuffle4!(a, b, [0, 4, 1, 5]) } /// Zip vectors @@ -8269,7 +8269,7 @@ pub unsafe fn vzip1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vzip1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { - simd_shuffle2(a, b, [0, 2]) + simd_shuffle2!(a, b, [0, 2]) } /// Zip vectors @@ -8277,7 +8277,7 @@ pub unsafe fn vzip1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vzip2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - simd_shuffle8(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) + simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } /// Zip vectors @@ -8285,7 +8285,7 @@ pub unsafe fn vzip2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vzip2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - simd_shuffle16(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]) + simd_shuffle16!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]) } /// Zip vectors @@ -8293,7 +8293,7 @@ pub unsafe fn vzip2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vzip2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - simd_shuffle4(a, b, [2, 6, 3, 7]) + simd_shuffle4!(a, b, [2, 6, 3, 7]) } /// Zip vectors @@ -8301,7 +8301,7 @@ pub unsafe fn vzip2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vzip2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - simd_shuffle8(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) + simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } /// Zip vectors @@ -8309,7 +8309,7 @@ pub unsafe fn vzip2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vzip2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - simd_shuffle2(a, b, [1, 3]) + simd_shuffle2!(a, b, [1, 3]) } /// Zip vectors @@ -8317,7 +8317,7 @@ pub unsafe fn vzip2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vzip2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - simd_shuffle4(a, b, [2, 6, 3, 7]) + simd_shuffle4!(a, b, [2, 6, 3, 7]) } /// Zip vectors @@ -8325,7 +8325,7 @@ pub unsafe fn vzip2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vzip2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - simd_shuffle2(a, b, [1, 3]) + simd_shuffle2!(a, b, [1, 3]) } /// Zip vectors @@ -8333,7 +8333,7 @@ pub unsafe fn vzip2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vzip2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - simd_shuffle8(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) + simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } /// Zip vectors @@ -8341,7 +8341,7 @@ pub unsafe fn vzip2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vzip2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - simd_shuffle16(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]) + simd_shuffle16!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]) } /// Zip vectors @@ -8349,7 +8349,7 @@ pub unsafe fn vzip2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vzip2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - simd_shuffle4(a, b, [2, 6, 3, 7]) + simd_shuffle4!(a, b, [2, 6, 3, 7]) } /// Zip vectors @@ -8357,7 +8357,7 @@ pub unsafe fn vzip2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vzip2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - simd_shuffle8(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) + simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } /// Zip vectors @@ -8365,7 +8365,7 @@ pub unsafe fn vzip2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vzip2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - simd_shuffle2(a, b, [1, 3]) + simd_shuffle2!(a, b, [1, 3]) } /// Zip vectors @@ -8373,7 +8373,7 @@ pub unsafe fn vzip2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vzip2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - simd_shuffle4(a, b, [2, 6, 3, 7]) + simd_shuffle4!(a, b, [2, 6, 3, 7]) } /// Zip vectors @@ -8381,7 +8381,7 @@ pub unsafe fn vzip2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vzip2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - simd_shuffle2(a, b, [1, 3]) + simd_shuffle2!(a, b, [1, 3]) } /// Zip vectors @@ -8389,7 +8389,7 @@ pub unsafe fn vzip2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vzip2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - simd_shuffle8(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) + simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } /// Zip vectors @@ -8397,7 +8397,7 @@ pub unsafe fn vzip2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vzip2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { - simd_shuffle16(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]) + simd_shuffle16!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]) } /// Zip vectors @@ -8405,7 +8405,7 @@ pub unsafe fn vzip2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vzip2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - simd_shuffle4(a, b, [2, 6, 3, 7]) + simd_shuffle4!(a, b, [2, 6, 3, 7]) } /// Zip vectors @@ -8413,7 +8413,7 @@ pub unsafe fn vzip2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vzip2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - simd_shuffle8(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) + simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) } /// Zip vectors @@ -8421,7 +8421,7 @@ pub unsafe fn vzip2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vzip2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { - simd_shuffle2(a, b, [1, 3]) + simd_shuffle2!(a, b, [1, 3]) } /// Zip vectors @@ -8429,7 +8429,7 @@ pub unsafe fn vzip2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vzip2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - simd_shuffle2(a, b, [1, 3]) + simd_shuffle2!(a, b, [1, 3]) } /// Zip vectors @@ -8437,7 +8437,7 @@ pub unsafe fn vzip2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vzip2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - simd_shuffle4(a, b, [2, 6, 3, 7]) + simd_shuffle4!(a, b, [2, 6, 3, 7]) } /// Zip vectors @@ -8445,7 +8445,7 @@ pub unsafe fn vzip2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vzip2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { - simd_shuffle2(a, b, [1, 3]) + simd_shuffle2!(a, b, [1, 3]) } /// Unzip vectors @@ -8453,7 +8453,7 @@ pub unsafe fn vzip2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp1))] pub unsafe fn vuzp1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - simd_shuffle8(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) + simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } /// Unzip vectors @@ -8461,7 +8461,7 @@ pub unsafe fn vuzp1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp1))] pub unsafe fn vuzp1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - simd_shuffle16(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]) + simd_shuffle16!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]) } /// Unzip vectors @@ -8469,7 +8469,7 @@ pub unsafe fn vuzp1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp1))] pub unsafe fn vuzp1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - simd_shuffle4(a, b, [0, 2, 4, 6]) + simd_shuffle4!(a, b, [0, 2, 4, 6]) } /// Unzip vectors @@ -8477,7 +8477,7 @@ pub unsafe fn vuzp1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp1))] pub unsafe fn vuzp1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - simd_shuffle8(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) + simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } /// Unzip vectors @@ -8485,7 +8485,7 @@ pub unsafe fn vuzp1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp1))] pub unsafe fn vuzp1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - simd_shuffle4(a, b, [0, 2, 4, 6]) + simd_shuffle4!(a, b, [0, 2, 4, 6]) } /// Unzip vectors @@ -8493,7 +8493,7 @@ pub unsafe fn vuzp1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp1))] pub unsafe fn vuzp1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - simd_shuffle8(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) + simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } /// Unzip vectors @@ -8501,7 +8501,7 @@ pub unsafe fn vuzp1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp1))] pub unsafe fn vuzp1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - simd_shuffle16(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]) + simd_shuffle16!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]) } /// Unzip vectors @@ -8509,7 +8509,7 @@ pub unsafe fn vuzp1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp1))] pub unsafe fn vuzp1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - simd_shuffle4(a, b, [0, 2, 4, 6]) + simd_shuffle4!(a, b, [0, 2, 4, 6]) } /// Unzip vectors @@ -8517,7 +8517,7 @@ pub unsafe fn vuzp1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp1))] pub unsafe fn vuzp1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - simd_shuffle8(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) + simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } /// Unzip vectors @@ -8525,7 +8525,7 @@ pub unsafe fn vuzp1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp1))] pub unsafe fn vuzp1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - simd_shuffle4(a, b, [0, 2, 4, 6]) + simd_shuffle4!(a, b, [0, 2, 4, 6]) } /// Unzip vectors @@ -8533,7 +8533,7 @@ pub unsafe fn vuzp1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp1))] pub unsafe fn vuzp1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - simd_shuffle8(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) + simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } /// Unzip vectors @@ -8541,7 +8541,7 @@ pub unsafe fn vuzp1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp1))] pub unsafe fn vuzp1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { - simd_shuffle16(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]) + simd_shuffle16!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]) } /// Unzip vectors @@ -8549,7 +8549,7 @@ pub unsafe fn vuzp1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp1))] pub unsafe fn vuzp1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - simd_shuffle4(a, b, [0, 2, 4, 6]) + simd_shuffle4!(a, b, [0, 2, 4, 6]) } /// Unzip vectors @@ -8557,7 +8557,7 @@ pub unsafe fn vuzp1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp1))] pub unsafe fn vuzp1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - simd_shuffle8(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) + simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) } /// Unzip vectors @@ -8565,7 +8565,7 @@ pub unsafe fn vuzp1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vuzp1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - simd_shuffle2(a, b, [0, 2]) + simd_shuffle2!(a, b, [0, 2]) } /// Unzip vectors @@ -8573,7 +8573,7 @@ pub unsafe fn vuzp1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vuzp1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - simd_shuffle2(a, b, [0, 2]) + simd_shuffle2!(a, b, [0, 2]) } /// Unzip vectors @@ -8581,7 +8581,7 @@ pub unsafe fn vuzp1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vuzp1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - simd_shuffle2(a, b, [0, 2]) + simd_shuffle2!(a, b, [0, 2]) } /// Unzip vectors @@ -8589,7 +8589,7 @@ pub unsafe fn vuzp1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vuzp1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - simd_shuffle2(a, b, [0, 2]) + simd_shuffle2!(a, b, [0, 2]) } /// Unzip vectors @@ -8597,7 +8597,7 @@ pub unsafe fn vuzp1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vuzp1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { - simd_shuffle2(a, b, [0, 2]) + simd_shuffle2!(a, b, [0, 2]) } /// Unzip vectors @@ -8605,7 +8605,7 @@ pub unsafe fn vuzp1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp1))] pub unsafe fn vuzp1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - simd_shuffle4(a, b, [0, 2, 4, 6]) + simd_shuffle4!(a, b, [0, 2, 4, 6]) } /// Unzip vectors @@ -8613,7 +8613,7 @@ pub unsafe fn vuzp1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vuzp1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - simd_shuffle2(a, b, [0, 2]) + simd_shuffle2!(a, b, [0, 2]) } /// Unzip vectors @@ -8621,7 +8621,7 @@ pub unsafe fn vuzp1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip1))] pub unsafe fn vuzp1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { - simd_shuffle2(a, b, [0, 2]) + simd_shuffle2!(a, b, [0, 2]) } /// Unzip vectors @@ -8629,7 +8629,7 @@ pub unsafe fn vuzp1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp2))] pub unsafe fn vuzp2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - simd_shuffle8(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) + simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } /// Unzip vectors @@ -8637,7 +8637,7 @@ pub unsafe fn vuzp2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp2))] pub unsafe fn vuzp2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - simd_shuffle16(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]) + simd_shuffle16!(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]) } /// Unzip vectors @@ -8645,7 +8645,7 @@ pub unsafe fn vuzp2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp2))] pub unsafe fn vuzp2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - simd_shuffle4(a, b, [1, 3, 5, 7]) + simd_shuffle4!(a, b, [1, 3, 5, 7]) } /// Unzip vectors @@ -8653,7 +8653,7 @@ pub unsafe fn vuzp2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp2))] pub unsafe fn vuzp2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - simd_shuffle8(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) + simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } /// Unzip vectors @@ -8661,7 +8661,7 @@ pub unsafe fn vuzp2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp2))] pub unsafe fn vuzp2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - simd_shuffle4(a, b, [1, 3, 5, 7]) + simd_shuffle4!(a, b, [1, 3, 5, 7]) } /// Unzip vectors @@ -8669,7 +8669,7 @@ pub unsafe fn vuzp2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp2))] pub unsafe fn vuzp2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - simd_shuffle8(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) + simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } /// Unzip vectors @@ -8677,7 +8677,7 @@ pub unsafe fn vuzp2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp2))] pub unsafe fn vuzp2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - simd_shuffle16(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]) + simd_shuffle16!(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]) } /// Unzip vectors @@ -8685,7 +8685,7 @@ pub unsafe fn vuzp2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp2))] pub unsafe fn vuzp2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - simd_shuffle4(a, b, [1, 3, 5, 7]) + simd_shuffle4!(a, b, [1, 3, 5, 7]) } /// Unzip vectors @@ -8693,7 +8693,7 @@ pub unsafe fn vuzp2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp2))] pub unsafe fn vuzp2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - simd_shuffle8(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) + simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } /// Unzip vectors @@ -8701,7 +8701,7 @@ pub unsafe fn vuzp2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp2))] pub unsafe fn vuzp2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - simd_shuffle4(a, b, [1, 3, 5, 7]) + simd_shuffle4!(a, b, [1, 3, 5, 7]) } /// Unzip vectors @@ -8709,7 +8709,7 @@ pub unsafe fn vuzp2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp2))] pub unsafe fn vuzp2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - simd_shuffle8(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) + simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } /// Unzip vectors @@ -8717,7 +8717,7 @@ pub unsafe fn vuzp2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp2))] pub unsafe fn vuzp2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { - simd_shuffle16(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]) + simd_shuffle16!(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]) } /// Unzip vectors @@ -8725,7 +8725,7 @@ pub unsafe fn vuzp2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp2))] pub unsafe fn vuzp2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - simd_shuffle4(a, b, [1, 3, 5, 7]) + simd_shuffle4!(a, b, [1, 3, 5, 7]) } /// Unzip vectors @@ -8733,7 +8733,7 @@ pub unsafe fn vuzp2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp2))] pub unsafe fn vuzp2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - simd_shuffle8(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) + simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) } /// Unzip vectors @@ -8741,7 +8741,7 @@ pub unsafe fn vuzp2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vuzp2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - simd_shuffle2(a, b, [1, 3]) + simd_shuffle2!(a, b, [1, 3]) } /// Unzip vectors @@ -8749,7 +8749,7 @@ pub unsafe fn vuzp2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vuzp2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - simd_shuffle2(a, b, [1, 3]) + simd_shuffle2!(a, b, [1, 3]) } /// Unzip vectors @@ -8757,7 +8757,7 @@ pub unsafe fn vuzp2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vuzp2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - simd_shuffle2(a, b, [1, 3]) + simd_shuffle2!(a, b, [1, 3]) } /// Unzip vectors @@ -8765,7 +8765,7 @@ pub unsafe fn vuzp2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vuzp2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - simd_shuffle2(a, b, [1, 3]) + simd_shuffle2!(a, b, [1, 3]) } /// Unzip vectors @@ -8773,7 +8773,7 @@ pub unsafe fn vuzp2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vuzp2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { - simd_shuffle2(a, b, [1, 3]) + simd_shuffle2!(a, b, [1, 3]) } /// Unzip vectors @@ -8781,7 +8781,7 @@ pub unsafe fn vuzp2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uzp2))] pub unsafe fn vuzp2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - simd_shuffle4(a, b, [1, 3, 5, 7]) + simd_shuffle4!(a, b, [1, 3, 5, 7]) } /// Unzip vectors @@ -8789,7 +8789,7 @@ pub unsafe fn vuzp2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vuzp2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - simd_shuffle2(a, b, [1, 3]) + simd_shuffle2!(a, b, [1, 3]) } /// Unzip vectors @@ -8797,7 +8797,7 @@ pub unsafe fn vuzp2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(zip2))] pub unsafe fn vuzp2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { - simd_shuffle2(a, b, [1, 3]) + simd_shuffle2!(a, b, [1, 3]) } /// Unsigned Absolute difference and Accumulate Long diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs index 71ad92e84f..948ee479a6 100644 --- a/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/crates/core_arch/src/arm_shared/neon/generated.rs @@ -2449,7 +2449,7 @@ pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdup_lane_s8(a: int8x8_t) -> int8x8_t { static_assert_imm3!(N); - simd_shuffle8(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) + simd_shuffle8!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2461,7 +2461,7 @@ pub unsafe fn vdup_lane_s8(a: int8x8_t) -> int8x8_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdupq_laneq_s8(a: int8x16_t) -> int8x16_t { static_assert_imm4!(N); - simd_shuffle16(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) + simd_shuffle16!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2473,7 +2473,7 @@ pub unsafe fn vdupq_laneq_s8(a: int8x16_t) -> int8x16_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdup_lane_s16(a: int16x4_t) -> int16x4_t { static_assert_imm2!(N); - simd_shuffle4(a, a, [N as u32, N as u32, N as u32, N as u32]) + simd_shuffle4!(a, a, [N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2485,7 +2485,7 @@ pub unsafe fn vdup_lane_s16(a: int16x4_t) -> int16x4_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdupq_laneq_s16(a: int16x8_t) -> int16x8_t { static_assert_imm3!(N); - simd_shuffle8(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) + simd_shuffle8!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2497,7 +2497,7 @@ pub unsafe fn vdupq_laneq_s16(a: int16x8_t) -> int16x8_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdup_lane_s32(a: int32x2_t) -> int32x2_t { static_assert_imm1!(N); - simd_shuffle2(a, a, [N as u32, N as u32]) + simd_shuffle2!(a, a, [N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2509,7 +2509,7 @@ pub unsafe fn vdup_lane_s32(a: int32x2_t) -> int32x2_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdupq_laneq_s32(a: int32x4_t) -> int32x4_t { static_assert_imm2!(N); - simd_shuffle4(a, a, [N as u32, N as u32, N as u32, N as u32]) + simd_shuffle4!(a, a, [N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2521,7 +2521,7 @@ pub unsafe fn vdupq_laneq_s32(a: int32x4_t) -> int32x4_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdup_laneq_s8(a: int8x16_t) -> int8x8_t { static_assert_imm4!(N); - simd_shuffle8(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) + simd_shuffle8!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2533,7 +2533,7 @@ pub unsafe fn vdup_laneq_s8(a: int8x16_t) -> int8x8_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdup_laneq_s16(a: int16x8_t) -> int16x4_t { static_assert_imm3!(N); - simd_shuffle4(a, a, [N as u32, N as u32, N as u32, N as u32]) + simd_shuffle4!(a, a, [N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2545,7 +2545,7 @@ pub unsafe fn vdup_laneq_s16(a: int16x8_t) -> int16x4_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdup_laneq_s32(a: int32x4_t) -> int32x2_t { static_assert_imm2!(N); - simd_shuffle2(a, a, [N as u32, N as u32]) + simd_shuffle2!(a, a, [N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2557,7 +2557,7 @@ pub unsafe fn vdup_laneq_s32(a: int32x4_t) -> int32x2_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdupq_lane_s8(a: int8x8_t) -> int8x16_t { static_assert_imm3!(N); - simd_shuffle16(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) + simd_shuffle16!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2569,7 +2569,7 @@ pub unsafe fn vdupq_lane_s8(a: int8x8_t) -> int8x16_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdupq_lane_s16(a: int16x4_t) -> int16x8_t { static_assert_imm2!(N); - simd_shuffle8(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) + simd_shuffle8!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2581,7 +2581,7 @@ pub unsafe fn vdupq_lane_s16(a: int16x4_t) -> int16x8_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdupq_lane_s32(a: int32x2_t) -> int32x4_t { static_assert_imm1!(N); - simd_shuffle4(a, a, [N as u32, N as u32, N as u32, N as u32]) + simd_shuffle4!(a, a, [N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2593,7 +2593,7 @@ pub unsafe fn vdupq_lane_s32(a: int32x2_t) -> int32x4_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdup_lane_u8(a: uint8x8_t) -> uint8x8_t { static_assert_imm3!(N); - simd_shuffle8(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) + simd_shuffle8!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2605,7 +2605,7 @@ pub unsafe fn vdup_lane_u8(a: uint8x8_t) -> uint8x8_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdupq_laneq_u8(a: uint8x16_t) -> uint8x16_t { static_assert_imm4!(N); - simd_shuffle16(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) + simd_shuffle16!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2617,7 +2617,7 @@ pub unsafe fn vdupq_laneq_u8(a: uint8x16_t) -> uint8x16_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdup_lane_u16(a: uint16x4_t) -> uint16x4_t { static_assert_imm2!(N); - simd_shuffle4(a, a, [N as u32, N as u32, N as u32, N as u32]) + simd_shuffle4!(a, a, [N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2629,7 +2629,7 @@ pub unsafe fn vdup_lane_u16(a: uint16x4_t) -> uint16x4_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdupq_laneq_u16(a: uint16x8_t) -> uint16x8_t { static_assert_imm3!(N); - simd_shuffle8(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) + simd_shuffle8!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2641,7 +2641,7 @@ pub unsafe fn vdupq_laneq_u16(a: uint16x8_t) -> uint16x8_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdup_lane_u32(a: uint32x2_t) -> uint32x2_t { static_assert_imm1!(N); - simd_shuffle2(a, a, [N as u32, N as u32]) + simd_shuffle2!(a, a, [N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2653,7 +2653,7 @@ pub unsafe fn vdup_lane_u32(a: uint32x2_t) -> uint32x2_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdupq_laneq_u32(a: uint32x4_t) -> uint32x4_t { static_assert_imm2!(N); - simd_shuffle4(a, a, [N as u32, N as u32, N as u32, N as u32]) + simd_shuffle4!(a, a, [N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2665,7 +2665,7 @@ pub unsafe fn vdupq_laneq_u32(a: uint32x4_t) -> uint32x4_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdup_laneq_u8(a: uint8x16_t) -> uint8x8_t { static_assert_imm4!(N); - simd_shuffle8(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) + simd_shuffle8!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2677,7 +2677,7 @@ pub unsafe fn vdup_laneq_u8(a: uint8x16_t) -> uint8x8_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdup_laneq_u16(a: uint16x8_t) -> uint16x4_t { static_assert_imm3!(N); - simd_shuffle4(a, a, [N as u32, N as u32, N as u32, N as u32]) + simd_shuffle4!(a, a, [N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2689,7 +2689,7 @@ pub unsafe fn vdup_laneq_u16(a: uint16x8_t) -> uint16x4_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdup_laneq_u32(a: uint32x4_t) -> uint32x2_t { static_assert_imm2!(N); - simd_shuffle2(a, a, [N as u32, N as u32]) + simd_shuffle2!(a, a, [N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2701,7 +2701,7 @@ pub unsafe fn vdup_laneq_u32(a: uint32x4_t) -> uint32x2_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdupq_lane_u8(a: uint8x8_t) -> uint8x16_t { static_assert_imm3!(N); - simd_shuffle16(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) + simd_shuffle16!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2713,7 +2713,7 @@ pub unsafe fn vdupq_lane_u8(a: uint8x8_t) -> uint8x16_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdupq_lane_u16(a: uint16x4_t) -> uint16x8_t { static_assert_imm2!(N); - simd_shuffle8(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) + simd_shuffle8!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2725,7 +2725,7 @@ pub unsafe fn vdupq_lane_u16(a: uint16x4_t) -> uint16x8_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdupq_lane_u32(a: uint32x2_t) -> uint32x4_t { static_assert_imm1!(N); - simd_shuffle4(a, a, [N as u32, N as u32, N as u32, N as u32]) + simd_shuffle4!(a, a, [N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2737,7 +2737,7 @@ pub unsafe fn vdupq_lane_u32(a: uint32x2_t) -> uint32x4_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdup_lane_p8(a: poly8x8_t) -> poly8x8_t { static_assert_imm3!(N); - simd_shuffle8(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) + simd_shuffle8!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2749,7 +2749,7 @@ pub unsafe fn vdup_lane_p8(a: poly8x8_t) -> poly8x8_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdupq_laneq_p8(a: poly8x16_t) -> poly8x16_t { static_assert_imm4!(N); - simd_shuffle16(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) + simd_shuffle16!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2761,7 +2761,7 @@ pub unsafe fn vdupq_laneq_p8(a: poly8x16_t) -> poly8x16_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdup_lane_p16(a: poly16x4_t) -> poly16x4_t { static_assert_imm2!(N); - simd_shuffle4(a, a, [N as u32, N as u32, N as u32, N as u32]) + simd_shuffle4!(a, a, [N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2773,7 +2773,7 @@ pub unsafe fn vdup_lane_p16(a: poly16x4_t) -> poly16x4_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdupq_laneq_p16(a: poly16x8_t) -> poly16x8_t { static_assert_imm3!(N); - simd_shuffle8(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) + simd_shuffle8!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2785,7 +2785,7 @@ pub unsafe fn vdupq_laneq_p16(a: poly16x8_t) -> poly16x8_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdup_laneq_p8(a: poly8x16_t) -> poly8x8_t { static_assert_imm4!(N); - simd_shuffle8(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) + simd_shuffle8!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2797,7 +2797,7 @@ pub unsafe fn vdup_laneq_p8(a: poly8x16_t) -> poly8x8_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdup_laneq_p16(a: poly16x8_t) -> poly16x4_t { static_assert_imm3!(N); - simd_shuffle4(a, a, [N as u32, N as u32, N as u32, N as u32]) + simd_shuffle4!(a, a, [N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2809,7 +2809,7 @@ pub unsafe fn vdup_laneq_p16(a: poly16x8_t) -> poly16x4_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdupq_lane_p8(a: poly8x8_t) -> poly8x16_t { static_assert_imm3!(N); - simd_shuffle16(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) + simd_shuffle16!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2821,7 +2821,7 @@ pub unsafe fn vdupq_lane_p8(a: poly8x8_t) -> poly8x16_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdupq_lane_p16(a: poly16x4_t) -> poly16x8_t { static_assert_imm2!(N); - simd_shuffle8(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) + simd_shuffle8!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2833,7 +2833,7 @@ pub unsafe fn vdupq_lane_p16(a: poly16x4_t) -> poly16x8_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdupq_laneq_s64(a: int64x2_t) -> int64x2_t { static_assert_imm1!(N); - simd_shuffle2(a, a, [N as u32, N as u32]) + simd_shuffle2!(a, a, [N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2845,7 +2845,7 @@ pub unsafe fn vdupq_laneq_s64(a: int64x2_t) -> int64x2_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdupq_lane_s64(a: int64x1_t) -> int64x2_t { static_assert!(N : i32 where N == 0); - simd_shuffle2(a, a, [N as u32, N as u32]) + simd_shuffle2!(a, a, [N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2857,7 +2857,7 @@ pub unsafe fn vdupq_lane_s64(a: int64x1_t) -> int64x2_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdupq_laneq_u64(a: uint64x2_t) -> uint64x2_t { static_assert_imm1!(N); - simd_shuffle2(a, a, [N as u32, N as u32]) + simd_shuffle2!(a, a, [N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2869,7 +2869,7 @@ pub unsafe fn vdupq_laneq_u64(a: uint64x2_t) -> uint64x2_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdupq_lane_u64(a: uint64x1_t) -> uint64x2_t { static_assert!(N : i32 where N == 0); - simd_shuffle2(a, a, [N as u32, N as u32]) + simd_shuffle2!(a, a, [N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2881,7 +2881,7 @@ pub unsafe fn vdupq_lane_u64(a: uint64x1_t) -> uint64x2_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdup_lane_f32(a: float32x2_t) -> float32x2_t { static_assert_imm1!(N); - simd_shuffle2(a, a, [N as u32, N as u32]) + simd_shuffle2!(a, a, [N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2893,7 +2893,7 @@ pub unsafe fn vdup_lane_f32(a: float32x2_t) -> float32x2_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdupq_laneq_f32(a: float32x4_t) -> float32x4_t { static_assert_imm2!(N); - simd_shuffle4(a, a, [N as u32, N as u32, N as u32, N as u32]) + simd_shuffle4!(a, a, [N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2905,7 +2905,7 @@ pub unsafe fn vdupq_laneq_f32(a: float32x4_t) -> float32x4_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdup_laneq_f32(a: float32x4_t) -> float32x2_t { static_assert_imm2!(N); - simd_shuffle2(a, a, [N as u32, N as u32]) + simd_shuffle2!(a, a, [N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2917,7 +2917,7 @@ pub unsafe fn vdup_laneq_f32(a: float32x4_t) -> float32x2_t { #[rustc_legacy_const_generics(1)] pub unsafe fn vdupq_lane_f32(a: float32x2_t) -> float32x4_t { static_assert_imm1!(N); - simd_shuffle4(a, a, [N as u32, N as u32, N as u32, N as u32]) + simd_shuffle4!(a, a, [N as u32, N as u32, N as u32, N as u32]) } /// Set all vector lanes to the same value @@ -2978,14 +2978,14 @@ pub unsafe fn vdup_laneq_u64(a: uint64x2_t) -> uint64x1_t { pub unsafe fn vext_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { static_assert_imm3!(N); match N & 0b111 { - 0 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle8(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), - 2 => simd_shuffle8(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), - 3 => simd_shuffle8(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), - 4 => simd_shuffle8(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), - 5 => simd_shuffle8(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), - 6 => simd_shuffle8(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), - 7 => simd_shuffle8(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + 0 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle8!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle8!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle8!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle8!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle8!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle8!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), } } @@ -3000,22 +3000,22 @@ pub unsafe fn vext_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { pub unsafe fn vextq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { static_assert_imm4!(N); match N & 0b1111 { - 0 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 1 => simd_shuffle16(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), - 2 => simd_shuffle16(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), - 3 => simd_shuffle16(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), - 4 => simd_shuffle16(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), - 5 => simd_shuffle16(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), - 6 => simd_shuffle16(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), - 7 => simd_shuffle16(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), - 8 => simd_shuffle16(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), - 9 => simd_shuffle16(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), - 10 => simd_shuffle16(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), - 11 => simd_shuffle16(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), - 12 => simd_shuffle16(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), - 13 => simd_shuffle16(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), - 14 => simd_shuffle16(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), - 15 => simd_shuffle16(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), + 0 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle16!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), + 2 => simd_shuffle16!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), + 3 => simd_shuffle16!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), + 4 => simd_shuffle16!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), + 5 => simd_shuffle16!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), + 6 => simd_shuffle16!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), + 7 => simd_shuffle16!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), + 8 => simd_shuffle16!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), + 9 => simd_shuffle16!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), + 10 => simd_shuffle16!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), + 11 => simd_shuffle16!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), + 12 => simd_shuffle16!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), + 13 => simd_shuffle16!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), + 14 => simd_shuffle16!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), + 15 => simd_shuffle16!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), } } @@ -3030,10 +3030,10 @@ pub unsafe fn vextq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { pub unsafe fn vext_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { static_assert_imm2!(N); match N & 0b11 { - 0 => simd_shuffle4(a, b, [0, 1, 2, 3]), - 1 => simd_shuffle4(a, b, [1, 2, 3, 4]), - 2 => simd_shuffle4(a, b, [2, 3, 4, 5]), - 3 => simd_shuffle4(a, b, [3, 4, 5, 6]), + 0 => simd_shuffle4!(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle4!(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle4!(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle4!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), } } @@ -3048,14 +3048,14 @@ pub unsafe fn vext_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { pub unsafe fn vextq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { static_assert_imm3!(N); match N & 0b111 { - 0 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle8(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), - 2 => simd_shuffle8(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), - 3 => simd_shuffle8(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), - 4 => simd_shuffle8(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), - 5 => simd_shuffle8(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), - 6 => simd_shuffle8(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), - 7 => simd_shuffle8(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + 0 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle8!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle8!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle8!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle8!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle8!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle8!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), } } @@ -3070,8 +3070,8 @@ pub unsafe fn vextq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { pub unsafe fn vext_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { static_assert_imm1!(N); match N & 0b1 { - 0 => simd_shuffle2(a, b, [0, 1]), - 1 => simd_shuffle2(a, b, [1, 2]), + 0 => simd_shuffle2!(a, b, [0, 1]), + 1 => simd_shuffle2!(a, b, [1, 2]), _ => unreachable_unchecked(), } } @@ -3086,10 +3086,10 @@ pub unsafe fn vext_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { pub unsafe fn vextq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { static_assert_imm2!(N); match N & 0b11 { - 0 => simd_shuffle4(a, b, [0, 1, 2, 3]), - 1 => simd_shuffle4(a, b, [1, 2, 3, 4]), - 2 => simd_shuffle4(a, b, [2, 3, 4, 5]), - 3 => simd_shuffle4(a, b, [3, 4, 5, 6]), + 0 => simd_shuffle4!(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle4!(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle4!(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle4!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), } } @@ -3104,14 +3104,14 @@ pub unsafe fn vextq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { pub unsafe fn vext_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { static_assert_imm3!(N); match N & 0b111 { - 0 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle8(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), - 2 => simd_shuffle8(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), - 3 => simd_shuffle8(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), - 4 => simd_shuffle8(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), - 5 => simd_shuffle8(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), - 6 => simd_shuffle8(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), - 7 => simd_shuffle8(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + 0 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle8!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle8!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle8!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle8!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle8!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle8!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), } } @@ -3126,22 +3126,22 @@ pub unsafe fn vext_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { pub unsafe fn vextq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { static_assert_imm4!(N); match N & 0b1111 { - 0 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 1 => simd_shuffle16(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), - 2 => simd_shuffle16(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), - 3 => simd_shuffle16(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), - 4 => simd_shuffle16(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), - 5 => simd_shuffle16(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), - 6 => simd_shuffle16(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), - 7 => simd_shuffle16(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), - 8 => simd_shuffle16(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), - 9 => simd_shuffle16(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), - 10 => simd_shuffle16(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), - 11 => simd_shuffle16(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), - 12 => simd_shuffle16(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), - 13 => simd_shuffle16(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), - 14 => simd_shuffle16(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), - 15 => simd_shuffle16(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), + 0 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle16!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), + 2 => simd_shuffle16!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), + 3 => simd_shuffle16!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), + 4 => simd_shuffle16!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), + 5 => simd_shuffle16!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), + 6 => simd_shuffle16!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), + 7 => simd_shuffle16!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), + 8 => simd_shuffle16!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), + 9 => simd_shuffle16!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), + 10 => simd_shuffle16!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), + 11 => simd_shuffle16!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), + 12 => simd_shuffle16!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), + 13 => simd_shuffle16!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), + 14 => simd_shuffle16!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), + 15 => simd_shuffle16!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), } } @@ -3156,10 +3156,10 @@ pub unsafe fn vextq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t pub unsafe fn vext_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { static_assert_imm2!(N); match N & 0b11 { - 0 => simd_shuffle4(a, b, [0, 1, 2, 3]), - 1 => simd_shuffle4(a, b, [1, 2, 3, 4]), - 2 => simd_shuffle4(a, b, [2, 3, 4, 5]), - 3 => simd_shuffle4(a, b, [3, 4, 5, 6]), + 0 => simd_shuffle4!(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle4!(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle4!(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle4!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), } } @@ -3174,14 +3174,14 @@ pub unsafe fn vext_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t pub unsafe fn vextq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { static_assert_imm3!(N); match N & 0b111 { - 0 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle8(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), - 2 => simd_shuffle8(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), - 3 => simd_shuffle8(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), - 4 => simd_shuffle8(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), - 5 => simd_shuffle8(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), - 6 => simd_shuffle8(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), - 7 => simd_shuffle8(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + 0 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle8!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle8!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle8!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle8!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle8!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle8!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), } } @@ -3196,8 +3196,8 @@ pub unsafe fn vextq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_ pub unsafe fn vext_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { static_assert_imm1!(N); match N & 0b1 { - 0 => simd_shuffle2(a, b, [0, 1]), - 1 => simd_shuffle2(a, b, [1, 2]), + 0 => simd_shuffle2!(a, b, [0, 1]), + 1 => simd_shuffle2!(a, b, [1, 2]), _ => unreachable_unchecked(), } } @@ -3212,10 +3212,10 @@ pub unsafe fn vext_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t pub unsafe fn vextq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { static_assert_imm2!(N); match N & 0b11 { - 0 => simd_shuffle4(a, b, [0, 1, 2, 3]), - 1 => simd_shuffle4(a, b, [1, 2, 3, 4]), - 2 => simd_shuffle4(a, b, [2, 3, 4, 5]), - 3 => simd_shuffle4(a, b, [3, 4, 5, 6]), + 0 => simd_shuffle4!(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle4!(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle4!(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle4!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), } } @@ -3230,14 +3230,14 @@ pub unsafe fn vextq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_ pub unsafe fn vext_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { static_assert_imm3!(N); match N & 0b111 { - 0 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle8(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), - 2 => simd_shuffle8(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), - 3 => simd_shuffle8(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), - 4 => simd_shuffle8(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), - 5 => simd_shuffle8(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), - 6 => simd_shuffle8(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), - 7 => simd_shuffle8(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + 0 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle8!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle8!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle8!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle8!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle8!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle8!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), } } @@ -3252,22 +3252,22 @@ pub unsafe fn vext_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { pub unsafe fn vextq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { static_assert_imm4!(N); match N & 0b1111 { - 0 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), - 1 => simd_shuffle16(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), - 2 => simd_shuffle16(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), - 3 => simd_shuffle16(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), - 4 => simd_shuffle16(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), - 5 => simd_shuffle16(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), - 6 => simd_shuffle16(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), - 7 => simd_shuffle16(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), - 8 => simd_shuffle16(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), - 9 => simd_shuffle16(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), - 10 => simd_shuffle16(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), - 11 => simd_shuffle16(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), - 12 => simd_shuffle16(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), - 13 => simd_shuffle16(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), - 14 => simd_shuffle16(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), - 15 => simd_shuffle16(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), + 0 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle16!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), + 2 => simd_shuffle16!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), + 3 => simd_shuffle16!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), + 4 => simd_shuffle16!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), + 5 => simd_shuffle16!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), + 6 => simd_shuffle16!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), + 7 => simd_shuffle16!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), + 8 => simd_shuffle16!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), + 9 => simd_shuffle16!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), + 10 => simd_shuffle16!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), + 11 => simd_shuffle16!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), + 12 => simd_shuffle16!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), + 13 => simd_shuffle16!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), + 14 => simd_shuffle16!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), + 15 => simd_shuffle16!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), } } @@ -3282,10 +3282,10 @@ pub unsafe fn vextq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t pub unsafe fn vext_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { static_assert_imm2!(N); match N & 0b11 { - 0 => simd_shuffle4(a, b, [0, 1, 2, 3]), - 1 => simd_shuffle4(a, b, [1, 2, 3, 4]), - 2 => simd_shuffle4(a, b, [2, 3, 4, 5]), - 3 => simd_shuffle4(a, b, [3, 4, 5, 6]), + 0 => simd_shuffle4!(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle4!(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle4!(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle4!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), } } @@ -3300,14 +3300,14 @@ pub unsafe fn vext_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t pub unsafe fn vextq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { static_assert_imm3!(N); match N & 0b111 { - 0 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle8(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), - 2 => simd_shuffle8(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), - 3 => simd_shuffle8(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), - 4 => simd_shuffle8(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), - 5 => simd_shuffle8(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), - 6 => simd_shuffle8(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), - 7 => simd_shuffle8(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + 0 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle8!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle8!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle8!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle8!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle8!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle8!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), } } @@ -3322,8 +3322,8 @@ pub unsafe fn vextq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_ pub unsafe fn vextq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { static_assert_imm1!(N); match N & 0b1 { - 0 => simd_shuffle2(a, b, [0, 1]), - 1 => simd_shuffle2(a, b, [1, 2]), + 0 => simd_shuffle2!(a, b, [0, 1]), + 1 => simd_shuffle2!(a, b, [1, 2]), _ => unreachable_unchecked(), } } @@ -3338,8 +3338,8 @@ pub unsafe fn vextq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { pub unsafe fn vextq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { static_assert_imm1!(N); match N & 0b1 { - 0 => simd_shuffle2(a, b, [0, 1]), - 1 => simd_shuffle2(a, b, [1, 2]), + 0 => simd_shuffle2!(a, b, [0, 1]), + 1 => simd_shuffle2!(a, b, [1, 2]), _ => unreachable_unchecked(), } } @@ -3354,8 +3354,8 @@ pub unsafe fn vextq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_ pub unsafe fn vext_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { static_assert_imm1!(N); match N & 0b1 { - 0 => simd_shuffle2(a, b, [0, 1]), - 1 => simd_shuffle2(a, b, [1, 2]), + 0 => simd_shuffle2!(a, b, [0, 1]), + 1 => simd_shuffle2!(a, b, [1, 2]), _ => unreachable_unchecked(), } } @@ -3370,10 +3370,10 @@ pub unsafe fn vext_f32(a: float32x2_t, b: float32x2_t) -> float32x pub unsafe fn vextq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { static_assert_imm2!(N); match N & 0b11 { - 0 => simd_shuffle4(a, b, [0, 1, 2, 3]), - 1 => simd_shuffle4(a, b, [1, 2, 3, 4]), - 2 => simd_shuffle4(a, b, [2, 3, 4, 5]), - 3 => simd_shuffle4(a, b, [3, 4, 5, 6]), + 0 => simd_shuffle4!(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle4!(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle4!(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle4!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), } } @@ -3627,7 +3627,7 @@ pub unsafe fn vmlaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t #[rustc_legacy_const_generics(3)] pub unsafe fn vmla_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { static_assert_imm2!(LANE); - vmla_s16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmla_s16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector multiply accumulate with scalar @@ -3639,7 +3639,7 @@ pub unsafe fn vmla_lane_s16(a: int16x4_t, b: int16x4_t, c: int1 #[rustc_legacy_const_generics(3)] pub unsafe fn vmla_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t { static_assert_imm3!(LANE); - vmla_s16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmla_s16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector multiply accumulate with scalar @@ -3651,7 +3651,7 @@ pub unsafe fn vmla_laneq_s16(a: int16x4_t, b: int16x4_t, c: int #[rustc_legacy_const_generics(3)] pub unsafe fn vmlaq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t { static_assert_imm2!(LANE); - vmlaq_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlaq_s16(a, b, simd_shuffle8!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector multiply accumulate with scalar @@ -3663,7 +3663,7 @@ pub unsafe fn vmlaq_lane_s16(a: int16x8_t, b: int16x8_t, c: int #[rustc_legacy_const_generics(3)] pub unsafe fn vmlaq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { static_assert_imm3!(LANE); - vmlaq_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlaq_s16(a, b, simd_shuffle8!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector multiply accumulate with scalar @@ -3675,7 +3675,7 @@ pub unsafe fn vmlaq_laneq_s16(a: int16x8_t, b: int16x8_t, c: in #[rustc_legacy_const_generics(3)] pub unsafe fn vmla_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { static_assert_imm1!(LANE); - vmla_s32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) + vmla_s32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) } /// Vector multiply accumulate with scalar @@ -3687,7 +3687,7 @@ pub unsafe fn vmla_lane_s32(a: int32x2_t, b: int32x2_t, c: int3 #[rustc_legacy_const_generics(3)] pub unsafe fn vmla_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t { static_assert_imm2!(LANE); - vmla_s32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) + vmla_s32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) } /// Vector multiply accumulate with scalar @@ -3699,7 +3699,7 @@ pub unsafe fn vmla_laneq_s32(a: int32x2_t, b: int32x2_t, c: int #[rustc_legacy_const_generics(3)] pub unsafe fn vmlaq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t { static_assert_imm1!(LANE); - vmlaq_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlaq_s32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector multiply accumulate with scalar @@ -3711,7 +3711,7 @@ pub unsafe fn vmlaq_lane_s32(a: int32x4_t, b: int32x4_t, c: int #[rustc_legacy_const_generics(3)] pub unsafe fn vmlaq_laneq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { static_assert_imm2!(LANE); - vmlaq_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlaq_s32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector multiply accumulate with scalar @@ -3723,7 +3723,7 @@ pub unsafe fn vmlaq_laneq_s32(a: int32x4_t, b: int32x4_t, c: in #[rustc_legacy_const_generics(3)] pub unsafe fn vmla_lane_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { static_assert_imm2!(LANE); - vmla_u16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmla_u16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector multiply accumulate with scalar @@ -3735,7 +3735,7 @@ pub unsafe fn vmla_lane_u16(a: uint16x4_t, b: uint16x4_t, c: ui #[rustc_legacy_const_generics(3)] pub unsafe fn vmla_laneq_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t { static_assert_imm3!(LANE); - vmla_u16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmla_u16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector multiply accumulate with scalar @@ -3747,7 +3747,7 @@ pub unsafe fn vmla_laneq_u16(a: uint16x4_t, b: uint16x4_t, c: u #[rustc_legacy_const_generics(3)] pub unsafe fn vmlaq_lane_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t { static_assert_imm2!(LANE); - vmlaq_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlaq_u16(a, b, simd_shuffle8!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector multiply accumulate with scalar @@ -3759,7 +3759,7 @@ pub unsafe fn vmlaq_lane_u16(a: uint16x8_t, b: uint16x8_t, c: u #[rustc_legacy_const_generics(3)] pub unsafe fn vmlaq_laneq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { static_assert_imm3!(LANE); - vmlaq_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlaq_u16(a, b, simd_shuffle8!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector multiply accumulate with scalar @@ -3771,7 +3771,7 @@ pub unsafe fn vmlaq_laneq_u16(a: uint16x8_t, b: uint16x8_t, c: #[rustc_legacy_const_generics(3)] pub unsafe fn vmla_lane_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { static_assert_imm1!(LANE); - vmla_u32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) + vmla_u32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) } /// Vector multiply accumulate with scalar @@ -3783,7 +3783,7 @@ pub unsafe fn vmla_lane_u32(a: uint32x2_t, b: uint32x2_t, c: ui #[rustc_legacy_const_generics(3)] pub unsafe fn vmla_laneq_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t { static_assert_imm2!(LANE); - vmla_u32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) + vmla_u32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) } /// Vector multiply accumulate with scalar @@ -3795,7 +3795,7 @@ pub unsafe fn vmla_laneq_u32(a: uint32x2_t, b: uint32x2_t, c: u #[rustc_legacy_const_generics(3)] pub unsafe fn vmlaq_lane_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t { static_assert_imm1!(LANE); - vmlaq_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlaq_u32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector multiply accumulate with scalar @@ -3807,7 +3807,7 @@ pub unsafe fn vmlaq_lane_u32(a: uint32x4_t, b: uint32x4_t, c: u #[rustc_legacy_const_generics(3)] pub unsafe fn vmlaq_laneq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { static_assert_imm2!(LANE); - vmlaq_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlaq_u32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector multiply accumulate with scalar @@ -3819,7 +3819,7 @@ pub unsafe fn vmlaq_laneq_u32(a: uint32x4_t, b: uint32x4_t, c: #[rustc_legacy_const_generics(3)] pub unsafe fn vmla_lane_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { static_assert_imm1!(LANE); - vmla_f32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) + vmla_f32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) } /// Vector multiply accumulate with scalar @@ -3831,7 +3831,7 @@ pub unsafe fn vmla_lane_f32(a: float32x2_t, b: float32x2_t, c: #[rustc_legacy_const_generics(3)] pub unsafe fn vmla_laneq_f32(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t { static_assert_imm2!(LANE); - vmla_f32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) + vmla_f32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) } /// Vector multiply accumulate with scalar @@ -3843,7 +3843,7 @@ pub unsafe fn vmla_laneq_f32(a: float32x2_t, b: float32x2_t, c: #[rustc_legacy_const_generics(3)] pub unsafe fn vmlaq_lane_f32(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t { static_assert_imm1!(LANE); - vmlaq_f32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlaq_f32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector multiply accumulate with scalar @@ -3855,7 +3855,7 @@ pub unsafe fn vmlaq_lane_f32(a: float32x4_t, b: float32x4_t, c: #[rustc_legacy_const_generics(3)] pub unsafe fn vmlaq_laneq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { static_assert_imm2!(LANE); - vmlaq_f32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlaq_f32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Signed multiply-add long @@ -3967,7 +3967,7 @@ pub unsafe fn vmlal_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t { #[rustc_legacy_const_generics(3)] pub unsafe fn vmlal_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { static_assert_imm2!(LANE); - vmlal_s16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlal_s16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector widening multiply accumulate with scalar @@ -3979,7 +3979,7 @@ pub unsafe fn vmlal_lane_s16(a: int32x4_t, b: int16x4_t, c: int #[rustc_legacy_const_generics(3)] pub unsafe fn vmlal_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t { static_assert_imm3!(LANE); - vmlal_s16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlal_s16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector widening multiply accumulate with scalar @@ -3991,7 +3991,7 @@ pub unsafe fn vmlal_laneq_s16(a: int32x4_t, b: int16x4_t, c: in #[rustc_legacy_const_generics(3)] pub unsafe fn vmlal_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { static_assert_imm1!(LANE); - vmlal_s32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) + vmlal_s32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) } /// Vector widening multiply accumulate with scalar @@ -4003,7 +4003,7 @@ pub unsafe fn vmlal_lane_s32(a: int64x2_t, b: int32x2_t, c: int #[rustc_legacy_const_generics(3)] pub unsafe fn vmlal_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t { static_assert_imm2!(LANE); - vmlal_s32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) + vmlal_s32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) } /// Vector widening multiply accumulate with scalar @@ -4015,7 +4015,7 @@ pub unsafe fn vmlal_laneq_s32(a: int64x2_t, b: int32x2_t, c: in #[rustc_legacy_const_generics(3)] pub unsafe fn vmlal_lane_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { static_assert_imm2!(LANE); - vmlal_u16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlal_u16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector widening multiply accumulate with scalar @@ -4027,7 +4027,7 @@ pub unsafe fn vmlal_lane_u16(a: uint32x4_t, b: uint16x4_t, c: u #[rustc_legacy_const_generics(3)] pub unsafe fn vmlal_laneq_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t { static_assert_imm3!(LANE); - vmlal_u16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlal_u16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector widening multiply accumulate with scalar @@ -4039,7 +4039,7 @@ pub unsafe fn vmlal_laneq_u16(a: uint32x4_t, b: uint16x4_t, c: #[rustc_legacy_const_generics(3)] pub unsafe fn vmlal_lane_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { static_assert_imm1!(LANE); - vmlal_u32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) + vmlal_u32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) } /// Vector widening multiply accumulate with scalar @@ -4051,7 +4051,7 @@ pub unsafe fn vmlal_lane_u32(a: uint64x2_t, b: uint32x2_t, c: u #[rustc_legacy_const_generics(3)] pub unsafe fn vmlal_laneq_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t { static_assert_imm2!(LANE); - vmlal_u32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) + vmlal_u32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) } /// Multiply-subtract from accumulator @@ -4303,7 +4303,7 @@ pub unsafe fn vmlsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t #[rustc_legacy_const_generics(3)] pub unsafe fn vmls_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { static_assert_imm2!(LANE); - vmls_s16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmls_s16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector multiply subtract with scalar @@ -4315,7 +4315,7 @@ pub unsafe fn vmls_lane_s16(a: int16x4_t, b: int16x4_t, c: int1 #[rustc_legacy_const_generics(3)] pub unsafe fn vmls_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t { static_assert_imm3!(LANE); - vmls_s16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmls_s16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector multiply subtract with scalar @@ -4327,7 +4327,7 @@ pub unsafe fn vmls_laneq_s16(a: int16x4_t, b: int16x4_t, c: int #[rustc_legacy_const_generics(3)] pub unsafe fn vmlsq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t { static_assert_imm2!(LANE); - vmlsq_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlsq_s16(a, b, simd_shuffle8!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector multiply subtract with scalar @@ -4339,7 +4339,7 @@ pub unsafe fn vmlsq_lane_s16(a: int16x8_t, b: int16x8_t, c: int #[rustc_legacy_const_generics(3)] pub unsafe fn vmlsq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { static_assert_imm3!(LANE); - vmlsq_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlsq_s16(a, b, simd_shuffle8!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector multiply subtract with scalar @@ -4351,7 +4351,7 @@ pub unsafe fn vmlsq_laneq_s16(a: int16x8_t, b: int16x8_t, c: in #[rustc_legacy_const_generics(3)] pub unsafe fn vmls_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { static_assert_imm1!(LANE); - vmls_s32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) + vmls_s32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) } /// Vector multiply subtract with scalar @@ -4363,7 +4363,7 @@ pub unsafe fn vmls_lane_s32(a: int32x2_t, b: int32x2_t, c: int3 #[rustc_legacy_const_generics(3)] pub unsafe fn vmls_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t { static_assert_imm2!(LANE); - vmls_s32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) + vmls_s32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) } /// Vector multiply subtract with scalar @@ -4375,7 +4375,7 @@ pub unsafe fn vmls_laneq_s32(a: int32x2_t, b: int32x2_t, c: int #[rustc_legacy_const_generics(3)] pub unsafe fn vmlsq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t { static_assert_imm1!(LANE); - vmlsq_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlsq_s32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector multiply subtract with scalar @@ -4387,7 +4387,7 @@ pub unsafe fn vmlsq_lane_s32(a: int32x4_t, b: int32x4_t, c: int #[rustc_legacy_const_generics(3)] pub unsafe fn vmlsq_laneq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { static_assert_imm2!(LANE); - vmlsq_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlsq_s32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector multiply subtract with scalar @@ -4399,7 +4399,7 @@ pub unsafe fn vmlsq_laneq_s32(a: int32x4_t, b: int32x4_t, c: in #[rustc_legacy_const_generics(3)] pub unsafe fn vmls_lane_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { static_assert_imm2!(LANE); - vmls_u16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmls_u16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector multiply subtract with scalar @@ -4411,7 +4411,7 @@ pub unsafe fn vmls_lane_u16(a: uint16x4_t, b: uint16x4_t, c: ui #[rustc_legacy_const_generics(3)] pub unsafe fn vmls_laneq_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t { static_assert_imm3!(LANE); - vmls_u16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmls_u16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector multiply subtract with scalar @@ -4423,7 +4423,7 @@ pub unsafe fn vmls_laneq_u16(a: uint16x4_t, b: uint16x4_t, c: u #[rustc_legacy_const_generics(3)] pub unsafe fn vmlsq_lane_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t { static_assert_imm2!(LANE); - vmlsq_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlsq_u16(a, b, simd_shuffle8!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector multiply subtract with scalar @@ -4435,7 +4435,7 @@ pub unsafe fn vmlsq_lane_u16(a: uint16x8_t, b: uint16x8_t, c: u #[rustc_legacy_const_generics(3)] pub unsafe fn vmlsq_laneq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { static_assert_imm3!(LANE); - vmlsq_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlsq_u16(a, b, simd_shuffle8!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector multiply subtract with scalar @@ -4447,7 +4447,7 @@ pub unsafe fn vmlsq_laneq_u16(a: uint16x8_t, b: uint16x8_t, c: #[rustc_legacy_const_generics(3)] pub unsafe fn vmls_lane_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { static_assert_imm1!(LANE); - vmls_u32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) + vmls_u32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) } /// Vector multiply subtract with scalar @@ -4459,7 +4459,7 @@ pub unsafe fn vmls_lane_u32(a: uint32x2_t, b: uint32x2_t, c: ui #[rustc_legacy_const_generics(3)] pub unsafe fn vmls_laneq_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t { static_assert_imm2!(LANE); - vmls_u32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) + vmls_u32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) } /// Vector multiply subtract with scalar @@ -4471,7 +4471,7 @@ pub unsafe fn vmls_laneq_u32(a: uint32x2_t, b: uint32x2_t, c: u #[rustc_legacy_const_generics(3)] pub unsafe fn vmlsq_lane_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t { static_assert_imm1!(LANE); - vmlsq_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlsq_u32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector multiply subtract with scalar @@ -4483,7 +4483,7 @@ pub unsafe fn vmlsq_lane_u32(a: uint32x4_t, b: uint32x4_t, c: u #[rustc_legacy_const_generics(3)] pub unsafe fn vmlsq_laneq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { static_assert_imm2!(LANE); - vmlsq_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlsq_u32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector multiply subtract with scalar @@ -4495,7 +4495,7 @@ pub unsafe fn vmlsq_laneq_u32(a: uint32x4_t, b: uint32x4_t, c: #[rustc_legacy_const_generics(3)] pub unsafe fn vmls_lane_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { static_assert_imm1!(LANE); - vmls_f32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) + vmls_f32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) } /// Vector multiply subtract with scalar @@ -4507,7 +4507,7 @@ pub unsafe fn vmls_lane_f32(a: float32x2_t, b: float32x2_t, c: #[rustc_legacy_const_generics(3)] pub unsafe fn vmls_laneq_f32(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t { static_assert_imm2!(LANE); - vmls_f32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) + vmls_f32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) } /// Vector multiply subtract with scalar @@ -4519,7 +4519,7 @@ pub unsafe fn vmls_laneq_f32(a: float32x2_t, b: float32x2_t, c: #[rustc_legacy_const_generics(3)] pub unsafe fn vmlsq_lane_f32(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t { static_assert_imm1!(LANE); - vmlsq_f32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlsq_f32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector multiply subtract with scalar @@ -4531,7 +4531,7 @@ pub unsafe fn vmlsq_lane_f32(a: float32x4_t, b: float32x4_t, c: #[rustc_legacy_const_generics(3)] pub unsafe fn vmlsq_laneq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { static_assert_imm2!(LANE); - vmlsq_f32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlsq_f32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Signed multiply-subtract long @@ -4643,7 +4643,7 @@ pub unsafe fn vmlsl_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t { #[rustc_legacy_const_generics(3)] pub unsafe fn vmlsl_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { static_assert_imm2!(LANE); - vmlsl_s16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlsl_s16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector widening multiply subtract with scalar @@ -4655,7 +4655,7 @@ pub unsafe fn vmlsl_lane_s16(a: int32x4_t, b: int16x4_t, c: int #[rustc_legacy_const_generics(3)] pub unsafe fn vmlsl_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t { static_assert_imm3!(LANE); - vmlsl_s16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlsl_s16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector widening multiply subtract with scalar @@ -4667,7 +4667,7 @@ pub unsafe fn vmlsl_laneq_s16(a: int32x4_t, b: int16x4_t, c: in #[rustc_legacy_const_generics(3)] pub unsafe fn vmlsl_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { static_assert_imm1!(LANE); - vmlsl_s32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) + vmlsl_s32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) } /// Vector widening multiply subtract with scalar @@ -4679,7 +4679,7 @@ pub unsafe fn vmlsl_lane_s32(a: int64x2_t, b: int32x2_t, c: int #[rustc_legacy_const_generics(3)] pub unsafe fn vmlsl_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t { static_assert_imm2!(LANE); - vmlsl_s32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) + vmlsl_s32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) } /// Vector widening multiply subtract with scalar @@ -4691,7 +4691,7 @@ pub unsafe fn vmlsl_laneq_s32(a: int64x2_t, b: int32x2_t, c: in #[rustc_legacy_const_generics(3)] pub unsafe fn vmlsl_lane_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { static_assert_imm2!(LANE); - vmlsl_u16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlsl_u16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector widening multiply subtract with scalar @@ -4703,7 +4703,7 @@ pub unsafe fn vmlsl_lane_u16(a: uint32x4_t, b: uint16x4_t, c: u #[rustc_legacy_const_generics(3)] pub unsafe fn vmlsl_laneq_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t { static_assert_imm3!(LANE); - vmlsl_u16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmlsl_u16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector widening multiply subtract with scalar @@ -4715,7 +4715,7 @@ pub unsafe fn vmlsl_laneq_u16(a: uint32x4_t, b: uint16x4_t, c: #[rustc_legacy_const_generics(3)] pub unsafe fn vmlsl_lane_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { static_assert_imm1!(LANE); - vmlsl_u32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) + vmlsl_u32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) } /// Vector widening multiply subtract with scalar @@ -4727,7 +4727,7 @@ pub unsafe fn vmlsl_lane_u32(a: uint64x2_t, b: uint32x2_t, c: u #[rustc_legacy_const_generics(3)] pub unsafe fn vmlsl_laneq_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t { static_assert_imm2!(LANE); - vmlsl_u32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) + vmlsl_u32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) } /// Negate @@ -6115,7 +6115,7 @@ pub unsafe fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t { #[rustc_legacy_const_generics(2)] pub unsafe fn vmul_lane_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { static_assert_imm2!(LANE); - simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + simd_mul(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply @@ -6127,7 +6127,7 @@ pub unsafe fn vmul_lane_s16(a: int16x4_t, b: int16x4_t) -> int1 #[rustc_legacy_const_generics(2)] pub unsafe fn vmul_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t { static_assert_imm3!(LANE); - simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + simd_mul(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply @@ -6139,7 +6139,7 @@ pub unsafe fn vmul_laneq_s16(a: int16x4_t, b: int16x8_t) -> int #[rustc_legacy_const_generics(2)] pub unsafe fn vmulq_lane_s16(a: int16x8_t, b: int16x4_t) -> int16x8_t { static_assert_imm2!(LANE); - simd_mul(a, simd_shuffle8(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + simd_mul(a, simd_shuffle8!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply @@ -6151,7 +6151,7 @@ pub unsafe fn vmulq_lane_s16(a: int16x8_t, b: int16x4_t) -> int #[rustc_legacy_const_generics(2)] pub unsafe fn vmulq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { static_assert_imm3!(LANE); - simd_mul(a, simd_shuffle8(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + simd_mul(a, simd_shuffle8!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply @@ -6163,7 +6163,7 @@ pub unsafe fn vmulq_laneq_s16(a: int16x8_t, b: int16x8_t) -> in #[rustc_legacy_const_generics(2)] pub unsafe fn vmul_lane_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { static_assert_imm1!(LANE); - simd_mul(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32])) + simd_mul(a, simd_shuffle2!(b, b, [LANE as u32, LANE as u32])) } /// Multiply @@ -6175,7 +6175,7 @@ pub unsafe fn vmul_lane_s32(a: int32x2_t, b: int32x2_t) -> int3 #[rustc_legacy_const_generics(2)] pub unsafe fn vmul_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t { static_assert_imm2!(LANE); - simd_mul(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32])) + simd_mul(a, simd_shuffle2!(b, b, [LANE as u32, LANE as u32])) } /// Multiply @@ -6187,7 +6187,7 @@ pub unsafe fn vmul_laneq_s32(a: int32x2_t, b: int32x4_t) -> int #[rustc_legacy_const_generics(2)] pub unsafe fn vmulq_lane_s32(a: int32x4_t, b: int32x2_t) -> int32x4_t { static_assert_imm1!(LANE); - simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + simd_mul(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply @@ -6199,7 +6199,7 @@ pub unsafe fn vmulq_lane_s32(a: int32x4_t, b: int32x2_t) -> int #[rustc_legacy_const_generics(2)] pub unsafe fn vmulq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { static_assert_imm2!(LANE); - simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + simd_mul(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply @@ -6211,7 +6211,7 @@ pub unsafe fn vmulq_laneq_s32(a: int32x4_t, b: int32x4_t) -> in #[rustc_legacy_const_generics(2)] pub unsafe fn vmul_lane_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { static_assert_imm2!(LANE); - simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + simd_mul(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply @@ -6223,7 +6223,7 @@ pub unsafe fn vmul_lane_u16(a: uint16x4_t, b: uint16x4_t) -> ui #[rustc_legacy_const_generics(2)] pub unsafe fn vmul_laneq_u16(a: uint16x4_t, b: uint16x8_t) -> uint16x4_t { static_assert_imm3!(LANE); - simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + simd_mul(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply @@ -6235,7 +6235,7 @@ pub unsafe fn vmul_laneq_u16(a: uint16x4_t, b: uint16x8_t) -> u #[rustc_legacy_const_generics(2)] pub unsafe fn vmulq_lane_u16(a: uint16x8_t, b: uint16x4_t) -> uint16x8_t { static_assert_imm2!(LANE); - simd_mul(a, simd_shuffle8(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + simd_mul(a, simd_shuffle8!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply @@ -6247,7 +6247,7 @@ pub unsafe fn vmulq_lane_u16(a: uint16x8_t, b: uint16x4_t) -> u #[rustc_legacy_const_generics(2)] pub unsafe fn vmulq_laneq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { static_assert_imm3!(LANE); - simd_mul(a, simd_shuffle8(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + simd_mul(a, simd_shuffle8!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply @@ -6259,7 +6259,7 @@ pub unsafe fn vmulq_laneq_u16(a: uint16x8_t, b: uint16x8_t) -> #[rustc_legacy_const_generics(2)] pub unsafe fn vmul_lane_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { static_assert_imm1!(LANE); - simd_mul(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32])) + simd_mul(a, simd_shuffle2!(b, b, [LANE as u32, LANE as u32])) } /// Multiply @@ -6271,7 +6271,7 @@ pub unsafe fn vmul_lane_u32(a: uint32x2_t, b: uint32x2_t) -> ui #[rustc_legacy_const_generics(2)] pub unsafe fn vmul_laneq_u32(a: uint32x2_t, b: uint32x4_t) -> uint32x2_t { static_assert_imm2!(LANE); - simd_mul(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32])) + simd_mul(a, simd_shuffle2!(b, b, [LANE as u32, LANE as u32])) } /// Multiply @@ -6283,7 +6283,7 @@ pub unsafe fn vmul_laneq_u32(a: uint32x2_t, b: uint32x4_t) -> u #[rustc_legacy_const_generics(2)] pub unsafe fn vmulq_lane_u32(a: uint32x4_t, b: uint32x2_t) -> uint32x4_t { static_assert_imm1!(LANE); - simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + simd_mul(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Multiply @@ -6295,7 +6295,7 @@ pub unsafe fn vmulq_lane_u32(a: uint32x4_t, b: uint32x2_t) -> u #[rustc_legacy_const_generics(2)] pub unsafe fn vmulq_laneq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { static_assert_imm2!(LANE); - simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + simd_mul(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Floating-point multiply @@ -6307,7 +6307,7 @@ pub unsafe fn vmulq_laneq_u32(a: uint32x4_t, b: uint32x4_t) -> #[rustc_legacy_const_generics(2)] pub unsafe fn vmul_lane_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { static_assert_imm1!(LANE); - simd_mul(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32])) + simd_mul(a, simd_shuffle2!(b, b, [LANE as u32, LANE as u32])) } /// Floating-point multiply @@ -6319,7 +6319,7 @@ pub unsafe fn vmul_lane_f32(a: float32x2_t, b: float32x2_t) -> #[rustc_legacy_const_generics(2)] pub unsafe fn vmul_laneq_f32(a: float32x2_t, b: float32x4_t) -> float32x2_t { static_assert_imm2!(LANE); - simd_mul(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32])) + simd_mul(a, simd_shuffle2!(b, b, [LANE as u32, LANE as u32])) } /// Floating-point multiply @@ -6331,7 +6331,7 @@ pub unsafe fn vmul_laneq_f32(a: float32x2_t, b: float32x4_t) -> #[rustc_legacy_const_generics(2)] pub unsafe fn vmulq_lane_f32(a: float32x4_t, b: float32x2_t) -> float32x4_t { static_assert_imm1!(LANE); - simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + simd_mul(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Floating-point multiply @@ -6343,7 +6343,7 @@ pub unsafe fn vmulq_lane_f32(a: float32x4_t, b: float32x2_t) -> #[rustc_legacy_const_generics(2)] pub unsafe fn vmulq_laneq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { static_assert_imm2!(LANE); - simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + simd_mul(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Signed multiply long @@ -6507,7 +6507,7 @@ pub unsafe fn vmulls_n_u32(a: uint32x2_t, b: u32) -> uint64x2_t { #[rustc_legacy_const_generics(2)] pub unsafe fn vmull_lane_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { static_assert_imm2!(LANE); - vmull_s16(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmull_s16(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector long multiply by scalar @@ -6519,7 +6519,7 @@ pub unsafe fn vmull_lane_s16(a: int16x4_t, b: int16x4_t) -> int #[rustc_legacy_const_generics(2)] pub unsafe fn vmull_laneq_s16(a: int16x4_t, b: int16x8_t) -> int32x4_t { static_assert_imm3!(LANE); - vmull_s16(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmull_s16(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector long multiply by scalar @@ -6531,7 +6531,7 @@ pub unsafe fn vmull_laneq_s16(a: int16x4_t, b: int16x8_t) -> in #[rustc_legacy_const_generics(2)] pub unsafe fn vmull_lane_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { static_assert_imm1!(LANE); - vmull_s32(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32])) + vmull_s32(a, simd_shuffle2!(b, b, [LANE as u32, LANE as u32])) } /// Vector long multiply by scalar @@ -6543,7 +6543,7 @@ pub unsafe fn vmull_lane_s32(a: int32x2_t, b: int32x2_t) -> int #[rustc_legacy_const_generics(2)] pub unsafe fn vmull_laneq_s32(a: int32x2_t, b: int32x4_t) -> int64x2_t { static_assert_imm2!(LANE); - vmull_s32(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32])) + vmull_s32(a, simd_shuffle2!(b, b, [LANE as u32, LANE as u32])) } /// Vector long multiply by scalar @@ -6555,7 +6555,7 @@ pub unsafe fn vmull_laneq_s32(a: int32x2_t, b: int32x4_t) -> in #[rustc_legacy_const_generics(2)] pub unsafe fn vmull_lane_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { static_assert_imm2!(LANE); - vmull_u16(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmull_u16(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector long multiply by scalar @@ -6567,7 +6567,7 @@ pub unsafe fn vmull_lane_u16(a: uint16x4_t, b: uint16x4_t) -> u #[rustc_legacy_const_generics(2)] pub unsafe fn vmull_laneq_u16(a: uint16x4_t, b: uint16x8_t) -> uint32x4_t { static_assert_imm3!(LANE); - vmull_u16(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) + vmull_u16(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } /// Vector long multiply by scalar @@ -6579,7 +6579,7 @@ pub unsafe fn vmull_laneq_u16(a: uint16x4_t, b: uint16x8_t) -> #[rustc_legacy_const_generics(2)] pub unsafe fn vmull_lane_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { static_assert_imm1!(LANE); - vmull_u32(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32])) + vmull_u32(a, simd_shuffle2!(b, b, [LANE as u32, LANE as u32])) } /// Vector long multiply by scalar @@ -6591,7 +6591,7 @@ pub unsafe fn vmull_lane_u32(a: uint32x2_t, b: uint32x2_t) -> u #[rustc_legacy_const_generics(2)] pub unsafe fn vmull_laneq_u32(a: uint32x2_t, b: uint32x4_t) -> uint64x2_t { static_assert_imm2!(LANE); - vmull_u32(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32])) + vmull_u32(a, simd_shuffle2!(b, b, [LANE as u32, LANE as u32])) } /// Floating-point fused Multiply-Add to accumulator(vector) @@ -6902,7 +6902,7 @@ pub unsafe fn vsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] pub unsafe fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { let d: int8x8_t = vsubhn_s16(b, c); - simd_shuffle16(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) + simd_shuffle16!(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } /// Subtract returning high narrow @@ -6913,7 +6913,7 @@ pub unsafe fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x1 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] pub unsafe fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { let d: int16x4_t = vsubhn_s32(b, c); - simd_shuffle8(a, d, [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(a, d, [0, 1, 2, 3, 4, 5, 6, 7]) } /// Subtract returning high narrow @@ -6924,7 +6924,7 @@ pub unsafe fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] pub unsafe fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { let d: int32x2_t = vsubhn_s64(b, c); - simd_shuffle4(a, d, [0, 1, 2, 3]) + simd_shuffle4!(a, d, [0, 1, 2, 3]) } /// Subtract returning high narrow @@ -6935,7 +6935,7 @@ pub unsafe fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] pub unsafe fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t { let d: uint8x8_t = vsubhn_u16(b, c); - simd_shuffle16(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) + simd_shuffle16!(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } /// Subtract returning high narrow @@ -6946,7 +6946,7 @@ pub unsafe fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uin #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] pub unsafe fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t { let d: uint16x4_t = vsubhn_u32(b, c); - simd_shuffle8(a, d, [0, 1, 2, 3, 4, 5, 6, 7]) + simd_shuffle8!(a, d, [0, 1, 2, 3, 4, 5, 6, 7]) } /// Subtract returning high narrow @@ -6957,7 +6957,7 @@ pub unsafe fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> ui #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] pub unsafe fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t { let d: uint32x2_t = vsubhn_u64(b, c); - simd_shuffle4(a, d, [0, 1, 2, 3]) + simd_shuffle4!(a, d, [0, 1, 2, 3]) } /// Signed halving subtract @@ -7857,7 +7857,7 @@ pub unsafe fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t { #[rustc_legacy_const_generics(2)] pub unsafe fn vqdmull_lane_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { static_assert_imm2!(N); - let b: int16x4_t = simd_shuffle4(b, b, [N as u32, N as u32, N as u32, N as u32]); + let b: int16x4_t = simd_shuffle4!(b, b, [N as u32, N as u32, N as u32, N as u32]); vqdmull_s16(a, b) } @@ -7870,7 +7870,7 @@ pub unsafe fn vqdmull_lane_s16(a: int16x4_t, b: int16x4_t) -> int3 #[rustc_legacy_const_generics(2)] pub unsafe fn vqdmull_lane_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { static_assert_imm1!(N); - let b: int32x2_t = simd_shuffle2(b, b, [N as u32, N as u32]); + let b: int32x2_t = simd_shuffle2!(b, b, [N as u32, N as u32]); vqdmull_s32(a, b) } @@ -8223,7 +8223,7 @@ pub unsafe fn vqrdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t { #[rustc_legacy_const_generics(2)] pub unsafe fn vqrdmulh_lane_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { static_assert_imm2!(LANE); - let b: int16x4_t = simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + let b: int16x4_t = simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); vqrdmulh_s16(a, b) } @@ -8236,7 +8236,7 @@ pub unsafe fn vqrdmulh_lane_s16(a: int16x4_t, b: int16x4_t) -> #[rustc_legacy_const_generics(2)] pub unsafe fn vqrdmulh_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t { static_assert_imm3!(LANE); - let b: int16x4_t = simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + let b: int16x4_t = simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); vqrdmulh_s16(a, b) } @@ -8249,7 +8249,7 @@ pub unsafe fn vqrdmulh_laneq_s16(a: int16x4_t, b: int16x8_t) -> #[rustc_legacy_const_generics(2)] pub unsafe fn vqrdmulhq_lane_s16(a: int16x8_t, b: int16x4_t) -> int16x8_t { static_assert_imm2!(LANE); - let b: int16x8_t = simd_shuffle8(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + let b: int16x8_t = simd_shuffle8!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]); vqrdmulhq_s16(a, b) } @@ -8262,7 +8262,7 @@ pub unsafe fn vqrdmulhq_lane_s16(a: int16x8_t, b: int16x4_t) -> #[rustc_legacy_const_generics(2)] pub unsafe fn vqrdmulhq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { static_assert_imm3!(LANE); - let b: int16x8_t = simd_shuffle8(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + let b: int16x8_t = simd_shuffle8!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]); vqrdmulhq_s16(a, b) } @@ -8275,7 +8275,7 @@ pub unsafe fn vqrdmulhq_laneq_s16(a: int16x8_t, b: int16x8_t) - #[rustc_legacy_const_generics(2)] pub unsafe fn vqrdmulh_lane_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { static_assert_imm1!(LANE); - let b: int32x2_t = simd_shuffle2(b, b, [LANE as u32, LANE as u32]); + let b: int32x2_t = simd_shuffle2!(b, b, [LANE as u32, LANE as u32]); vqrdmulh_s32(a, b) } @@ -8288,7 +8288,7 @@ pub unsafe fn vqrdmulh_lane_s32(a: int32x2_t, b: int32x2_t) -> #[rustc_legacy_const_generics(2)] pub unsafe fn vqrdmulh_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t { static_assert_imm2!(LANE); - let b: int32x2_t = simd_shuffle2(b, b, [LANE as u32, LANE as u32]); + let b: int32x2_t = simd_shuffle2!(b, b, [LANE as u32, LANE as u32]); vqrdmulh_s32(a, b) } @@ -8301,7 +8301,7 @@ pub unsafe fn vqrdmulh_laneq_s32(a: int32x2_t, b: int32x4_t) -> #[rustc_legacy_const_generics(2)] pub unsafe fn vqrdmulhq_lane_s32(a: int32x4_t, b: int32x2_t) -> int32x4_t { static_assert_imm1!(LANE); - let b: int32x4_t = simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + let b: int32x4_t = simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); vqrdmulhq_s32(a, b) } @@ -8314,7 +8314,7 @@ pub unsafe fn vqrdmulhq_lane_s32(a: int32x4_t, b: int32x2_t) -> #[rustc_legacy_const_generics(2)] pub unsafe fn vqrdmulhq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { static_assert_imm2!(LANE); - let b: int32x4_t = simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + let b: int32x4_t = simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); vqrdmulhq_s32(a, b) } diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec index bdf72621eb..e156aefcd1 100644 --- a/crates/stdarch-gen/neon.spec +++ b/crates/stdarch-gen/neon.spec @@ -727,7 +727,7 @@ lane-suffixes constn = LANE1:LANE2 multi_fn = static_assert_imm-in0_exp_len-LANE1 multi_fn = static_assert_imm-in_exp_len-LANE2 -multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-noext, a, b, {ins-in0_len-in0_len-LANE2} +multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in0_len-LANE2} a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 0, MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 n = 0:1 @@ -744,7 +744,7 @@ lane-suffixes constn = LANE1:LANE2 multi_fn = static_assert_imm-in0_exp_len-LANE1 multi_fn = static_assert_imm-in_exp_len-LANE2 -multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-noext, a, b, {ins-in0_len-in0_len-LANE2} +multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in0_len-LANE2} a = 1., 2., 3., 4. b = 0., 0.5, 0., 0. n = 0:1 @@ -759,8 +759,8 @@ lane-suffixes constn = LANE1:LANE2 multi_fn = static_assert_imm-in0_exp_len-LANE1 multi_fn = static_assert_imm-in_exp_len-LANE2 -multi_fn = simd_shuffle-in_len-noext, a:in_t, a, a, {asc-0-in_len} -multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-noext, a, b, {ins-in0_len-in_len-LANE2} +multi_fn = simd_shuffle-in_len-!, a:in_t, a, a, {asc-0-in_len} +multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in_len-LANE2} a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 0, MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 n = 0:1 @@ -777,8 +777,8 @@ lane-suffixes constn = LANE1:LANE2 multi_fn = static_assert_imm-in0_exp_len-LANE1 multi_fn = static_assert_imm-in_exp_len-LANE2 -multi_fn = simd_shuffle-in_len-noext, a:in_t, a, a, {asc-0-in_len} -multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-noext, a, b, {ins-in0_len-in_len-LANE2} +multi_fn = simd_shuffle-in_len-!, a:in_t, a, a, {asc-0-in_len} +multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in_len-LANE2} a = 1., 2., 3., 4. b = 0., 0.5, 0., 0. n = 0:1 @@ -793,8 +793,8 @@ lane-suffixes constn = LANE1:LANE2 multi_fn = static_assert_imm-in0_exp_len-LANE1 multi_fn = static_assert_imm-in_exp_len-LANE2 -multi_fn = simd_shuffle-in0_len-noext, b:in_t0, b, b, {asc-0-in0_len} -multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-noext, a, b, {ins-in0_len-in0_len-LANE2} +multi_fn = simd_shuffle-in0_len-!, b:in_t0, b, b, {asc-0-in0_len} +multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in0_len-LANE2} a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 0, MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 n = 0:1 @@ -811,8 +811,8 @@ lane-suffixes constn = LANE1:LANE2 multi_fn = static_assert_imm-in0_exp_len-LANE1 multi_fn = static_assert_imm-in_exp_len-LANE2 -multi_fn = simd_shuffle-in0_len-noext, b:in_t0, b, b, {asc-0-in0_len} -multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-noext, a, b, {ins-in0_len-in0_len-LANE2} +multi_fn = simd_shuffle-in0_len-!, b:in_t0, b, b, {asc-0-in0_len} +multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in0_len-LANE2} a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 n = 1:0 @@ -827,8 +827,8 @@ lane-suffixes constn = LANE1:LANE2 multi_fn = static_assert_imm-in0_exp_len-LANE1 multi_fn = static_assert_imm-in_exp_len-LANE2 -multi_fn = simd_shuffle-in0_len-noext, b:in_t0, b, b, {asc-0-in0_len} -multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-noext, a, b, {ins-in0_len-in0_len-LANE2} +multi_fn = simd_shuffle-in0_len-!, b:in_t0, b, b, {asc-0-in0_len} +multi_fn = matchn-in0_exp_len-LANE1, simd_shuffle-out_len-!, a, b, {ins-in0_len-in0_len-LANE2} a = 1., 2., 3., 4. b = 0.5, 0., 0., 0. n = 1:0 @@ -1162,7 +1162,7 @@ name = vdup lane-suffixes constn = N multi_fn = static_assert_imm-in_exp_len-N -multi_fn = simd_shuffle-out_len-noext, a, a, {dup-out_len-N as u32} +multi_fn = simd_shuffle-out_len-!, a, a, {dup-out_len-N as u32} a = 1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16 n = HFLEN validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 @@ -1188,7 +1188,7 @@ name = vdup lane-suffixes constn = N multi_fn = static_assert_imm-in_exp_len-N -multi_fn = simd_shuffle-out_len-noext, a, a, {dup-out_len-N as u32} +multi_fn = simd_shuffle-out_len-!, a, a, {dup-out_len-N as u32} a = 1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16 n = HFLEN validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 @@ -1202,7 +1202,7 @@ name = vdup lane-suffixes constn = N multi_fn = static_assert_imm-in_exp_len-N -multi_fn = simd_shuffle-out_len-noext, a, a, {dup-out_len-N as u32} +multi_fn = simd_shuffle-out_len-!, a, a, {dup-out_len-N as u32} a = 1., 1., 1., 4. n = HFLEN validate 1., 1., 1., 1. @@ -1303,7 +1303,7 @@ generate float32x2_t:f32, float32x4_t:f32, float64x1_t:f64, float64x2_t:f64 name = vext constn = N multi_fn = static_assert_imm-out_exp_len-N -multi_fn = matchn-out_exp_len-N, simd_shuffle-out_len-noext, a, b, {asc-n-out_len} +multi_fn = matchn-out_exp_len-N, simd_shuffle-out_len-!, a, b, {asc-n-out_len} a = 0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15 b = 9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11 n = HFLEN @@ -1317,7 +1317,7 @@ generate int*_t, uint*_t, poly8x8_t, poly8x16_t, poly16x4_t, poly16x8_t name = vext constn = N multi_fn = static_assert_imm-out_exp_len-N -multi_fn = matchn-out_exp_len-N, simd_shuffle-out_len-noext, a, b, {asc-n-out_len} +multi_fn = matchn-out_exp_len-N, simd_shuffle-out_len-!, a, b, {asc-n-out_len} a = 0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15 b = 9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11 n = HFLEN @@ -1333,7 +1333,7 @@ generate int64x2_t, uint64x2_t name = vext constn = N multi_fn = static_assert_imm-out_exp_len-N -multi_fn = matchn-out_exp_len-N, simd_shuffle-out_len-noext, a, b, {asc-n-out_len} +multi_fn = matchn-out_exp_len-N, simd_shuffle-out_len-!, a, b, {asc-n-out_len} a = 0., 2., 2., 3. b = 3., 4., 5., 6., n = HFLEN @@ -1403,7 +1403,7 @@ name = vmla in2-lane-suffixes constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE -multi_fn = vmla-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}} +multi_fn = vmla-self-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}} a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 @@ -1422,7 +1422,7 @@ name = vmla in2-lane-suffixes constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE -multi_fn = vmla-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}} +multi_fn = vmla-self-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}} a = 0., 1., 2., 3. b = 2., 2., 2., 2. c = 0., 3., 0., 0. @@ -1477,7 +1477,7 @@ name = vmlal_lane in2-suffix constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE -multi_fn = vmlal-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}} +multi_fn = vmlal-self-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}} a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 @@ -1495,8 +1495,8 @@ generate uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x2_t:uint /// Signed multiply-add long name = vmlal_high no-q -multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {fixed-half-right} -multi_fn = simd_shuffle-out_len-noext, c:half, c, c, {fixed-half-right} +multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right} +multi_fn = simd_shuffle-out_len-!, c:half, c, c, {fixed-half-right} multi_fn = vmlal-noqself-noext, a, b, c a = 8, 7, 6, 5, 4, 3, 2, 1 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 @@ -1510,8 +1510,8 @@ generate int16x8_t:int8x16_t:int8x16_t:int16x8_t, int32x4_t:int16x8_t:int16x8_t: /// Unsigned multiply-add long name = vmlal_high no-q -multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {fixed-half-right} -multi_fn = simd_shuffle-out_len-noext, c:half, c, c, {fixed-half-right} +multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right} +multi_fn = simd_shuffle-out_len-!, c:half, c, c, {fixed-half-right} multi_fn = vmlal-noqself-noext, a, b, c a = 8, 7, 6, 5, 4, 3, 2, 1 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 @@ -1541,7 +1541,7 @@ name = vmlal_high_lane in2-suffix constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE -multi_fn = vmlal_high-noqself-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}} +multi_fn = vmlal_high-noqself-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}} a = 8, 7, 6, 5, 4, 3, 2, 1 b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7 c = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 @@ -1613,7 +1613,7 @@ name = vmls in2-lane-suffixes constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE -multi_fn = vmls-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}} +multi_fn = vmls-self-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}} a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 @@ -1632,7 +1632,7 @@ name = vmls in2-lane-suffixes constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE -multi_fn = vmls-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}} +multi_fn = vmls-self-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}} a = 6., 7., 8., 9. b = 2., 2., 2., 2. c = 0., 3., 0., 0. @@ -1687,7 +1687,7 @@ name = vmlsl_lane in2-suffix constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE -multi_fn = vmlsl-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}} +multi_fn = vmlsl-self-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}} a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 @@ -1705,8 +1705,8 @@ generate uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x2_t:uint /// Signed multiply-subtract long name = vmlsl_high no-q -multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {fixed-half-right} -multi_fn = simd_shuffle-out_len-noext, c:half, c, c, {fixed-half-right} +multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right} +multi_fn = simd_shuffle-out_len-!, c:half, c, c, {fixed-half-right} multi_fn = vmlsl-noqself-noext, a, b, c a = 14, 15, 16, 17, 18, 19, 20, 21 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 @@ -1720,8 +1720,8 @@ generate int16x8_t:int8x16_t:int8x16_t:int16x8_t, int32x4_t:int16x8_t:int16x8_t: /// Unsigned multiply-subtract long name = vmlsl_high no-q -multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {fixed-half-right} -multi_fn = simd_shuffle-out_len-noext, c:half, c, c, {fixed-half-right} +multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right} +multi_fn = simd_shuffle-out_len-!, c:half, c, c, {fixed-half-right} multi_fn = vmlsl-noqself-noext, a, b, c a = 14, 15, 16, 17, 18, 19, 20, 21 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 @@ -1751,7 +1751,7 @@ name = vmlsl_high_lane in2-suffix constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE -multi_fn = vmlsl_high-noqself-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}} +multi_fn = vmlsl_high-noqself-noext, a, b, {simd_shuffle-in_len-!, c, c, {dup-in_len-LANE as u32}} a = 14, 15, 16, 17, 18, 19, 20, 21 b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7 c = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 @@ -1769,7 +1769,7 @@ generate uint64x2_t:uint32x4_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x4_t:uint name = vmovn_high no-q multi_fn = simd_cast, c:in_t0, b -multi_fn = simd_shuffle-out_len-noext, a, c, {asc-0-out_len} +multi_fn = simd_shuffle-out_len-!, a, c, {asc-0-out_len} a = 0, 1, 2, 3, 2, 3, 4, 5 b = 2, 3, 4, 5, 12, 13, 14, 15 validate 0, 1, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 12, 13, 14, 15 @@ -2070,7 +2070,7 @@ name = vmul lane-suffixes constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE -multi_fn = simd_mul, a, {simd_shuffle-out_len-noext, b, b, {dup-out_len-LANE as u32}} +multi_fn = simd_mul, a, {simd_shuffle-out_len-!, b, b, {dup-out_len-LANE as u32}} a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 n = 1 @@ -2102,7 +2102,7 @@ name = vmul lane-suffixes constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE -multi_fn = simd_mul, a, {simd_shuffle-out_len-noext, b, b, {dup-out_len-LANE as u32}} +multi_fn = simd_mul, a, {simd_shuffle-out_len-!, b, b, {dup-out_len-LANE as u32}} a = 1., 2., 3., 4. b = 2., 0., 0., 0. n = 0 @@ -2155,8 +2155,8 @@ generate int8x8_t:int8x8_t:int16x8_t, int16x4_t:int16x4_t:int32x4_t, int32x2_t:i /// Signed multiply long name = vmull_high no-q -multi_fn = simd_shuffle-out_len-noext, a:half, a, a, {fixed-half-right} -multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {fixed-half-right} +multi_fn = simd_shuffle-out_len-!, a:half, a, a, {fixed-half-right} +multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right} multi_fn = vmull-noqself-noext, a, b a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16 b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2 @@ -2181,8 +2181,8 @@ generate uint8x8_t:uint8x8_t:uint16x8_t, uint16x4_t:uint16x4_t:uint32x4_t, uint3 /// Unsigned multiply long name = vmull_high no-q -multi_fn = simd_shuffle-out_len-noext, a:half, a, a, {fixed-half-right} -multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {fixed-half-right} +multi_fn = simd_shuffle-out_len-!, a:half, a, a, {fixed-half-right} +multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right} multi_fn = vmull-noqself-noext, a, b a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16 b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2 @@ -2222,8 +2222,8 @@ link-arm = vmullp.v2i64:int64x1_t:int64x1_t:int64x1_t:int64x2_t /// Polynomial multiply long name = vmull_high no-q -multi_fn = simd_shuffle-out_len-noext, a:half, a, a, {fixed-half-right} -multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {fixed-half-right} +multi_fn = simd_shuffle-out_len-!, a:half, a, a, {fixed-half-right} +multi_fn = simd_shuffle-out_len-!, b:half, b, b, {fixed-half-right} multi_fn = vmull-noqself-noext, a, b a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16 b = 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3 @@ -2263,7 +2263,7 @@ generate uint16x4_t:u16:uint32x4_t, uint32x2_t:u32:uint64x2_t name = vmull_lane constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE -multi_fn = vmull-in0-noext, a, {simd_shuffle-in0_len-noext, b, b, {dup-in0_len-LANE as u32}} +multi_fn = vmull-in0-noext, a, {simd_shuffle-in0_len-!, b, b, {dup-in0_len-LANE as u32}} a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 n = 1 @@ -2294,7 +2294,7 @@ generate uint16x8_t:u16:uint32x4_t, uint32x4_t:u32:uint64x2_t name = vmull_high_lane constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE -multi_fn = vmull_high-noqself-noext, a, {simd_shuffle-in0_len-noext, b, b, {dup-in0_len-LANE as u32}} +multi_fn = vmull_high-noqself-noext, a, {simd_shuffle-in0_len-!, b, b, {dup-in0_len-LANE as u32}} a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16 b = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 n = 1 @@ -2336,7 +2336,7 @@ name = vmulx lane-suffixes constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE -multi_fn = vmulx-in0-noext, a, {simd_shuffle-in0_len-noext, b, b, {dup-in0_len-LANE as u32}} +multi_fn = vmulx-in0-noext, a, {simd_shuffle-in0_len-!, b, b, {dup-in0_len-LANE as u32}} a = 1., 2., 3., 4. b = 2., 0., 0., 0. n = 0 @@ -2573,7 +2573,7 @@ generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t name = vsubhn_high no-q multi_fn = vsubhn-noqself-noext, d:in_t0, b, c -multi_fn = simd_shuffle-out_len-noext, a, d, {asc-0-out_len} +multi_fn = simd_shuffle-out_len-!, a, d, {asc-0-out_len} a = MAX, 0, MAX, 0, MAX, 0, MAX, 0 b = MAX, 1, MAX, 1, MAX, 1, MAX, 1 c = 1, 0, 1, 0, 1, 0, 1, 0 @@ -3011,8 +3011,8 @@ generate int16x4_t:i16:int32x4_t, int32x2_t:i32:int64x2_t /// Signed saturating doubling multiply long name = vqdmull_high no-q -multi_fn = simd_shuffle-out_len-noext, a:half, a, a, {asc-halflen-halflen} -multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {asc-halflen-halflen} +multi_fn = simd_shuffle-out_len-!, a:half, a, a, {asc-halflen-halflen} +multi_fn = simd_shuffle-out_len-!, b:half, b, b, {asc-halflen-halflen} multi_fn = vqdmull-noqself-noext, a, b a = 0, 1, 4, 5, 4, 5, 6, 7 b = 1, 2, 5, 6, 5, 6, 7, 8 @@ -3024,7 +3024,7 @@ generate int16x8_t:int16x8_t:int32x4_t, int32x4_t:int32x4_t:int64x2_t /// Signed saturating doubling multiply long name = vqdmull_high_n no-q -multi_fn = simd_shuffle-out_len-noext, a:in_ntt, a, a, {asc-out_len-out_len} +multi_fn = simd_shuffle-out_len-!, a:in_ntt, a, a, {asc-out_len-out_len} multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b multi_fn = vqdmull-in_ntt-noext, a, b a = 0, 2, 8, 10, 8, 10, 12, 14 @@ -3038,7 +3038,7 @@ generate int16x8_t:i16:int32x4_t, int32x4_t:i32:int64x2_t name = vqdmull_lane constn = N multi_fn = static_assert_imm-in_exp_len-N -multi_fn = simd_shuffle-out_len-noext, b:in_t0, b, b, {dup-out_len-N as u32} +multi_fn = simd_shuffle-out_len-!, b:in_t0, b, b, {dup-out_len-N as u32} multi_fn = vqdmull-noqself-noext, a, b a = 1, 2, 3, 4 b = 0, 2, 2, 0, 2, 0, 0, 0 @@ -3083,8 +3083,8 @@ generate i32:int32x2_t:i64, i32:int32x4_t:i64 name = vqdmull_high_lane constn = N multi_fn = static_assert_imm-in_exp_len-N -multi_fn = simd_shuffle-out_len-noext, a:in_t, a, a, {asc-out_len-out_len} -multi_fn = simd_shuffle-out_len-noext, b:in_t, b, b, {dup-out_len-N as u32} +multi_fn = simd_shuffle-out_len-!, a:in_t, a, a, {asc-out_len-out_len} +multi_fn = simd_shuffle-out_len-!, b:in_t, b, b, {dup-out_len-N as u32} multi_fn = vqdmull-self-noext, a, b a = 0, 1, 4, 5, 4, 5, 6, 7 b = 0, 2, 2, 0, 2, 0, 0, 0 @@ -3098,8 +3098,8 @@ generate int16x8_t:int16x4_t:int32x4_t, int32x4_t:int32x2_t:int64x2_t name = vqdmull_high_lane constn = N multi_fn = static_assert_imm-in_exp_len-N -multi_fn = simd_shuffle-out_len-noext, a:half, a, a, {asc-out_len-out_len} -multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {dup-out_len-N as u32} +multi_fn = simd_shuffle-out_len-!, a:half, a, a, {asc-out_len-out_len} +multi_fn = simd_shuffle-out_len-!, b:half, b, b, {dup-out_len-N as u32} multi_fn = vqdmull-noqself-noext, a, b a = 0, 1, 4, 5, 4, 5, 6, 7 b = 0, 2, 2, 0, 2, 0, 0, 0 @@ -3390,7 +3390,7 @@ name = vqrdmulh lane-suffixes constn = LANE multi_fn = static_assert_imm-in_exp_len-LANE -multi_fn = simd_shuffle-out_len-noext, b:out_t, b, b, {dup-out_len-LANE as u32} +multi_fn = simd_shuffle-out_len-!, b:out_t, b, b, {dup-out_len-LANE as u32} multi_fn = vqrdmulh-out-noext, a, b a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX b = 0, 2, 0, 0, 0, 0, 0, 0, @@ -3616,7 +3616,7 @@ name = vqrshrn_high noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits -multi_fn = simd_shuffle-out_len-noext, a, {vqrshrn_n-noqself-::, b}, {asc-0-out_len} +multi_fn = simd_shuffle-out_len-!, a, {vqrshrn_n-noqself-::, b}, {asc-0-out_len} a = 0, 1, 2, 3, 2, 3, 6, 7 b = 8, 12, 24, 28, 48, 52, 56, 60 n = 2 @@ -3662,7 +3662,7 @@ name = vqrshrn_high noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits -multi_fn = simd_shuffle-out_len-noext, a, {vqrshrn_n-noqself-::, b}, {asc-0-out_len} +multi_fn = simd_shuffle-out_len-!, a, {vqrshrn_n-noqself-::, b}, {asc-0-out_len} a = 0, 1, 2, 3, 2, 3, 6, 7 b = 8, 12, 24, 28, 48, 52, 56, 60 n = 2 @@ -3708,7 +3708,7 @@ name = vqrshrun_high noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits -multi_fn = simd_shuffle-out_len-noext, a, {vqrshrun_n-noqself-::, b}, {asc-0-out_len} +multi_fn = simd_shuffle-out_len-!, a, {vqrshrun_n-noqself-::, b}, {asc-0-out_len} a = 0, 1, 2, 3, 2, 3, 6, 7 b = 8, 12, 24, 28, 48, 52, 56, 60 n = 2 @@ -3858,7 +3858,7 @@ name = vqshrn_high noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits -multi_fn = simd_shuffle-out_len-noext, a, {vqshrn_n-noqself-::, b}, {asc-0-out_len} +multi_fn = simd_shuffle-out_len-!, a, {vqshrn_n-noqself-::, b}, {asc-0-out_len} a = 0, 1, 8, 9, 8, 9, 10, 11 b = 32, 36, 40, 44, 48, 52, 56, 60 n = 2 @@ -3903,7 +3903,7 @@ name = vqshrn_high noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits -multi_fn = simd_shuffle-out_len-noext, a, {vqshrn_n-noqself-::, b}, {asc-0-out_len} +multi_fn = simd_shuffle-out_len-!, a, {vqshrn_n-noqself-::, b}, {asc-0-out_len} a = 0, 1, 8, 9, 8, 9, 10, 11 b = 32, 36, 40, 44, 48, 52, 56, 60 n = 2 @@ -3948,7 +3948,7 @@ name = vqshrun_high noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits -multi_fn = simd_shuffle-out_len-noext, a, {vqshrun_n-noqself-::, b}, {asc-0-out_len} +multi_fn = simd_shuffle-out_len-!, a, {vqshrun_n-noqself-::, b}, {asc-0-out_len} a = 0, 1, 8, 9, 8, 9, 10, 11 b = 32, 36, 40, 44, 48, 52, 56, 60 n = 2 @@ -4312,7 +4312,7 @@ name = vrshrn_high noq-n-suffix constn = N multi_fn = static_assert-N-1-halfbits -multi_fn = simd_shuffle-out_len-noext, a, {vrshrn_n-noqself-::, b}, {asc-0-out_len} +multi_fn = simd_shuffle-out_len-!, a, {vrshrn_n-noqself-::, b}, {asc-0-out_len} a = 0, 1, 8, 9, 8, 9, 10, 11 b = 32, 36, 40, 44, 48, 52, 56, 60 n = 2 @@ -4466,7 +4466,7 @@ name = vshll_high_n no-q constn = N multi_fn = static_assert-N-0-bits -multi_fn = simd_shuffle-out_len-noext, b:half, a, a, {asc-halflen-halflen} +multi_fn = simd_shuffle-out_len-!, b:half, a, a, {asc-halflen-halflen} multi_fn = vshll_n-noqself-::, b a = 0, 0, 1, 2, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6, 7, 8 n = 2 @@ -4513,7 +4513,7 @@ name = vshrn_high_n no-q constn = N multi_fn = static_assert-N-1-halfbits -multi_fn = simd_shuffle-out_len-noext, a, {vshrn_n-noqself-::, b}, {asc-0-out_len} +multi_fn = simd_shuffle-out_len-!, a, {vshrn_n-noqself-::, b}, {asc-0-out_len} a = 1, 2, 5, 6, 5, 6, 7, 8 b = 20, 24, 28, 32, 52, 56, 60, 64 n = 2 @@ -4555,7 +4555,7 @@ generate uint*_t, uint64x*_t /// Transpose vectors name = vtrn1 -multi_fn = simd_shuffle-in_len-noext, a, b, {transpose-1-in_len} +multi_fn = simd_shuffle-in_len-!, a, b, {transpose-1-in_len} a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 b = 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 validate 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 @@ -4568,7 +4568,7 @@ generate int32x2_t, int64x2_t, uint32x2_t, uint64x2_t, poly64x2_t /// Transpose vectors name = vtrn1 -multi_fn = simd_shuffle-in_len-noext, a, b, {transpose-1-in_len} +multi_fn = simd_shuffle-in_len-!, a, b, {transpose-1-in_len} a = 0., 2., 4., 6., 8., 10., 12., 14. b = 1., 3., 5., 7., 9., 11., 13., 15. validate 0., 1., 4., 5., 8., 9., 12., 13. @@ -4581,7 +4581,7 @@ generate float32x2_t, float64x2_t /// Transpose vectors name = vtrn2 -multi_fn = simd_shuffle-in_len-noext, a, b, {transpose-2-in_len} +multi_fn = simd_shuffle-in_len-!, a, b, {transpose-2-in_len} a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 b = 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 validate 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 @@ -4594,7 +4594,7 @@ generate int32x2_t, int64x2_t, uint32x2_t, uint64x2_t, poly64x2_t /// Transpose vectors name = vtrn2 -multi_fn = simd_shuffle-in_len-noext, a, b, {transpose-2-in_len} +multi_fn = simd_shuffle-in_len-!, a, b, {transpose-2-in_len} a = 0., 2., 4., 6., 8., 10., 12., 14. b = 1., 3., 5., 7., 9., 11., 13., 15. validate 2., 3., 6., 7., 10., 11., 14., 15. @@ -4607,7 +4607,7 @@ generate float32x2_t, float64x2_t /// Zip vectors name = vzip1 -multi_fn = simd_shuffle-in_len-noext, a, b, {zip-1-in_len} +multi_fn = simd_shuffle-in_len-!, a, b, {zip-1-in_len} a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 b = 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 @@ -4617,7 +4617,7 @@ generate int*_t, int64x2_t, uint*_t, uint64x2_t, poly8x8_t, poly8x16_t, poly16x4 /// Zip vectors name = vzip1 -multi_fn = simd_shuffle-in_len-noext, a, b, {zip-1-in_len} +multi_fn = simd_shuffle-in_len-!, a, b, {zip-1-in_len} a = 0., 2., 4., 6., 8., 10., 12., 14. b = 1., 3., 5., 7., 9., 11., 13., 15. validate 0., 1., 2., 3., 4., 5., 6., 7. @@ -4627,7 +4627,7 @@ generate float32x2_t, float32x4_t, float64x2_t /// Zip vectors name = vzip2 -multi_fn = simd_shuffle-in_len-noext, a, b, {zip-2-in_len} +multi_fn = simd_shuffle-in_len-!, a, b, {zip-2-in_len} a = 0, 16, 16, 18, 16, 18, 20, 22, 16, 18, 20, 22, 24, 26, 28, 30 b = 1, 17, 17, 19, 17, 19, 21, 23, 17, 19, 21, 23, 25, 27, 29, 31 validate 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 @@ -4637,7 +4637,7 @@ generate int*_t, int64x2_t, uint*_t, uint64x2_t, poly8x8_t, poly8x16_t, poly16x4 /// Zip vectors name = vzip2 -multi_fn = simd_shuffle-in_len-noext, a, b, {zip-2-in_len} +multi_fn = simd_shuffle-in_len-!, a, b, {zip-2-in_len} a = 0., 8., 8., 10., 8., 10., 12., 14. b = 1., 9., 9., 11., 9., 11., 13., 15. validate 8., 9., 10., 11., 12., 13., 14., 15. @@ -4647,7 +4647,7 @@ generate float32x2_t, float32x4_t, float64x2_t /// Unzip vectors name = vuzp1 -multi_fn = simd_shuffle-in_len-noext, a, b, {unzip-1-in_len} +multi_fn = simd_shuffle-in_len-!, a, b, {unzip-1-in_len} a = 1, 0, 2, 0, 2, 0, 3, 0, 2, 0, 3, 0, 7, 0, 8, 0 b = 2, 0, 3, 0, 7, 0, 8, 0, 13, 0, 14, 0, 15, 0, 16, 0 validate 1, 2, 2, 3, 2, 3, 7, 8, 2, 3, 7, 8, 13, 14, 15, 16 @@ -4660,7 +4660,7 @@ generate int32x2_t, int64x2_t, uint32x2_t, uint64x2_t, poly64x2_t /// Unzip vectors name = vuzp1 -multi_fn = simd_shuffle-in_len-noext, a, b, {unzip-1-in_len} +multi_fn = simd_shuffle-in_len-!, a, b, {unzip-1-in_len} a = 0., 8., 1., 9., 4., 12., 5., 13. b = 1., 10., 3., 11., 6., 14., 7., 15. validate 0., 1., 1., 3., 4., 5., 6., 7. @@ -4673,7 +4673,7 @@ generate float32x2_t, float64x2_t /// Unzip vectors name = vuzp2 -multi_fn = simd_shuffle-in_len-noext, a, b, {unzip-2-in_len} +multi_fn = simd_shuffle-in_len-!, a, b, {unzip-2-in_len} a = 0, 17, 0, 18, 0, 18, 0, 19, 0, 18, 0, 19, 0, 23, 0, 24 b = 0, 18, 0, 19, 0, 23, 0, 24, 0, 29, 0, 30, 0, 31, 0, 32 validate 17, 18, 18, 19, 18, 19, 23, 24, 18, 19, 23, 24, 29, 30, 31, 32 @@ -4686,7 +4686,7 @@ generate int32x2_t, int64x2_t, uint32x2_t, uint64x2_t, poly64x2_t /// Unzip vectors name = vuzp2 -multi_fn = simd_shuffle-in_len-noext, a, b, {unzip-2-in_len} +multi_fn = simd_shuffle-in_len-!, a, b, {unzip-2-in_len} a = 0., 8., 1., 9., 4., 12., 5., 13. b = 2., 9., 3., 11., 6., 14., 7., 15. validate 8., 9., 9., 11., 12., 13., 14., 15. diff --git a/crates/stdarch-gen/src/main.rs b/crates/stdarch-gen/src/main.rs index ffa85f28f5..36ea5ea796 100644 --- a/crates/stdarch-gen/src/main.rs +++ b/crates/stdarch-gen/src/main.rs @@ -988,6 +988,17 @@ fn gen_aarch64( ); } }; + let const_declare = if let Some(constn) = constn { + if constn.contains(":") { + let constns: Vec<_> = constn.split(':').map(|v| v.to_string()).collect(); + assert_eq!(constns.len(), 2); + format!(r#""#, constns[0], constns[1]) + } else { + format!(r#""#, constn) + } + } else { + String::new() + }; let multi_calls = if !multi_fn.is_empty() { let mut calls = String::new(); for i in 0..multi_fn.len() { @@ -997,6 +1008,7 @@ fn gen_aarch64( calls.push_str(&get_call( &multi_fn[i], current_name, + &const_declare, in_t, out_t, fixed, @@ -1007,17 +1019,6 @@ fn gen_aarch64( } else { String::new() }; - let const_declare = if let Some(constn) = constn { - if constn.contains(":") { - let constns: Vec<_> = constn.split(':').map(|v| v.to_string()).collect(); - assert_eq!(constns.len(), 2); - format!(r#""#, constns[0], constns[1]) - } else { - format!(r#""#, constn) - } - } else { - String::new() - }; let const_assert = if let Some(constn) = constn { if constn.contains(":") { let constns: Vec<_> = constn.split(':').map(|v| v.to_string()).collect(); @@ -1582,6 +1583,11 @@ fn gen_arm( )); } }; + let const_declare = if let Some(constn) = constn { + format!(r#""#, constn) + } else { + String::new() + }; let multi_calls = if !multi_fn.is_empty() { let mut calls = String::new(); for i in 0..multi_fn.len() { @@ -1591,6 +1597,7 @@ fn gen_arm( calls.push_str(&get_call( &multi_fn[i], current_name, + &const_declare, in_t, out_t, fixed, @@ -1601,11 +1608,6 @@ fn gen_arm( } else { String::new() }; - let const_declare = if let Some(constn) = constn { - format!(r#""#, constn) - } else { - String::new() - }; let const_assert = if let Some(constn) = constn { format!( r#", {} = {}"#, @@ -2003,6 +2005,7 @@ fn expand_intrinsic(intr: &str, t: &str) -> String { fn get_call( in_str: &str, current_name: &str, + const_declare: &str, in_t: &[&str; 3], out_t: &str, fixed: &Vec, @@ -2041,7 +2044,7 @@ fn get_call( "halflen" => type_len(in_t[1]) / 2, _ => 0, }; - let mut s = String::from("["); + let mut s = format!("{} [", const_declare); for i in 0..len { if i != 0 { s.push_str(", "); @@ -2084,7 +2087,7 @@ fn get_call( "in0_len" => type_len(in_t[0]), _ => 0, }; - let mut s = String::from("["); + let mut s = format!("{} [", const_declare); for i in 0..len { if i != 0 { s.push_str(", "); @@ -2167,7 +2170,7 @@ fn get_call( let sub_match = format!( " {} => {},\n", i, - get_call(&sub_call, current_name, in_t, out_t, fixed, Some(i as i32)) + get_call(&sub_call, current_name, const_declare, in_t, out_t, fixed, Some(i as i32)) ); call.push_str(&sub_match); } @@ -2210,6 +2213,7 @@ fn get_call( let sub_call = get_call( &sub_fn[1..sub_fn.len() - 1], current_name, + const_declare, in_t, out_t, fixed, From 0b7335fb3658687f59d670828a174c9e5a8b4c3e Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 11 May 2021 13:52:50 +0200 Subject: [PATCH 5/5] fmt --- crates/stdarch-gen/src/main.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/crates/stdarch-gen/src/main.rs b/crates/stdarch-gen/src/main.rs index 36ea5ea796..03e6f409ab 100644 --- a/crates/stdarch-gen/src/main.rs +++ b/crates/stdarch-gen/src/main.rs @@ -2170,7 +2170,15 @@ fn get_call( let sub_match = format!( " {} => {},\n", i, - get_call(&sub_call, current_name, const_declare, in_t, out_t, fixed, Some(i as i32)) + get_call( + &sub_call, + current_name, + const_declare, + in_t, + out_t, + fixed, + Some(i as i32) + ) ); call.push_str(&sub_match); }