From ff238f3135246fe1ed49f1cfc2bd79012ec5b877 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Fri, 18 Feb 2022 17:45:35 +0000 Subject: [PATCH 1/2] Stabilize vget_low_s8 on AArch64 This was missed when most of the AArch64 intrinsics were stabilized. --- crates/core_arch/src/arm_shared/neon/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/core_arch/src/arm_shared/neon/mod.rs b/crates/core_arch/src/arm_shared/neon/mod.rs index 7d170a00d1..952d1ca2e3 100644 --- a/crates/core_arch/src/arm_shared/neon/mod.rs +++ b/crates/core_arch/src/arm_shared/neon/mod.rs @@ -4989,6 +4989,10 @@ pub unsafe fn vget_high_f32(a: float32x4_t) -> float32x2_t { #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(test, assert_instr(nop))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "vget_low_s8", since = "1.60.0") +)] pub unsafe fn vget_low_s8(a: int8x16_t) -> int8x8_t { simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } From 49f501648a59782b112cca028d8154efd55c5e04 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sat, 19 Feb 2022 18:54:52 +0000 Subject: [PATCH 2/2] Updates for LLVM 14 on nightly --- .../core_arch/src/aarch64/neon/generated.rs | 84 ++++++++++++------- .../src/arm_shared/neon/generated.rs | 8 +- crates/intrinsic-test/missing_aarch64.txt | 23 ----- crates/stdarch-gen/neon.spec | 11 +-- 4 files changed, 66 insertions(+), 60 deletions(-) diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index e5df1b72c0..010c5de5dc 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -1564,7 +1564,7 @@ pub unsafe fn vclezd_f64(a: f64) -> u64 { /// Compare signed less than zero #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sshr))] +#[cfg_attr(test, assert_instr(cmlt))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vcltz_s8(a: int8x8_t) -> uint8x8_t { let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); @@ -1574,7 +1574,7 @@ pub unsafe fn vcltz_s8(a: int8x8_t) -> uint8x8_t { /// Compare signed less than zero #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sshr))] +#[cfg_attr(test, assert_instr(cmlt))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vcltzq_s8(a: int8x16_t) -> uint8x16_t { let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); @@ -1584,7 +1584,7 @@ pub unsafe fn vcltzq_s8(a: int8x16_t) -> uint8x16_t { /// Compare signed less than zero #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sshr))] +#[cfg_attr(test, assert_instr(cmlt))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vcltz_s16(a: int16x4_t) -> uint16x4_t { let b: i16x4 = i16x4::new(0, 0, 0, 0); @@ -1594,7 +1594,7 @@ pub unsafe fn vcltz_s16(a: int16x4_t) -> uint16x4_t { /// Compare signed less than zero #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sshr))] +#[cfg_attr(test, assert_instr(cmlt))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vcltzq_s16(a: int16x8_t) -> uint16x8_t { let b: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); @@ -1604,7 +1604,7 @@ pub unsafe fn vcltzq_s16(a: int16x8_t) -> uint16x8_t { /// Compare signed less than zero #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sshr))] +#[cfg_attr(test, assert_instr(cmlt))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vcltz_s32(a: int32x2_t) -> uint32x2_t { let b: i32x2 = i32x2::new(0, 0); @@ -1614,7 +1614,7 @@ pub unsafe fn vcltz_s32(a: int32x2_t) -> uint32x2_t { /// Compare signed less than zero #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sshr))] +#[cfg_attr(test, assert_instr(cmlt))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vcltzq_s32(a: int32x4_t) -> uint32x4_t { let b: i32x4 = i32x4::new(0, 0, 0, 0); @@ -1624,7 +1624,7 @@ pub unsafe fn vcltzq_s32(a: int32x4_t) -> uint32x4_t { /// Compare signed less than zero #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sshr))] +#[cfg_attr(test, assert_instr(cmlt))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vcltz_s64(a: int64x1_t) -> uint64x1_t { let b: i64x1 = i64x1::new(0); @@ -1634,7 +1634,7 @@ pub unsafe fn vcltz_s64(a: int64x1_t) -> uint64x1_t { /// Compare signed less than zero #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sshr))] +#[cfg_attr(test, assert_instr(cmlt))] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vcltzq_s64(a: int64x2_t) -> uint64x2_t { let b: i64x2 = i64x2::new(0, 0); @@ -2714,7 +2714,7 @@ pub unsafe fn vcopyq_lane_p16(a: poly16x8_t, /// Insert vector element from another vector element #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(zip1, LANE1 = 1, LANE2 = 0))] +#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vcopyq_lane_s64(a: int64x2_t, b: int64x1_t) -> int64x2_t { @@ -2731,7 +2731,7 @@ pub unsafe fn vcopyq_lane_s64(a: int64x2_t, /// Insert vector element from another vector element #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(zip1, LANE1 = 1, LANE2 = 0))] +#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vcopyq_lane_u64(a: uint64x2_t, b: uint64x1_t) -> uint64x2_t { @@ -2748,7 +2748,7 @@ pub unsafe fn vcopyq_lane_u64(a: uint64x2_t, /// Insert vector element from another vector element #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(zip1, LANE1 = 1, LANE2 = 0))] +#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vcopyq_lane_p64(a: poly64x2_t, b: poly64x1_t) -> poly64x2_t { @@ -2784,7 +2784,7 @@ pub unsafe fn vcopyq_lane_f32(a: float32x4_t /// Insert vector element from another vector element #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(zip1, LANE1 = 1, LANE2 = 0))] +#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))] #[rustc_legacy_const_generics(1, 3)] #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub unsafe fn vcopyq_lane_f64(a: float64x2_t, b: float64x1_t) -> float64x2_t { @@ -9183,7 +9183,7 @@ pub unsafe fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { vmaxq_f64_(a, b) } -/// Floating-point Maximun Number (vector) +/// Floating-point Maximum Number (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fmaxnm))] @@ -9197,7 +9197,7 @@ pub unsafe fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t { vmaxnm_f64_(a, b) } -/// Floating-point Maximun Number (vector) +/// Floating-point Maximum Number (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fmaxnm))] @@ -9379,7 +9379,7 @@ pub unsafe fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { vminq_f64_(a, b) } -/// Floating-point Minimun Number (vector) +/// Floating-point Minimum Number (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fminnm))] @@ -9393,7 +9393,7 @@ pub unsafe fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t { vminnm_f64_(a, b) } -/// Floating-point Minimun Number (vector) +/// Floating-point Minimum Number (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fminnm))] @@ -10535,7 +10535,12 @@ pub unsafe fn vqrdmulhs_laneq_s32(a: i32, b: int32x4_t) -> i32 #[target_feature(enable = "rdm")] #[cfg_attr(test, assert_instr(sqrdmlah))] pub unsafe fn vqrdmlah_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { - vqadd_s16(a, vqrdmulh_s16(b, c)) + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlah.v4i16")] + fn vqrdmlah_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t; + } + vqrdmlah_s16_(a, b, c) } /// Signed saturating rounding doubling multiply accumulate returning high half @@ -10543,7 +10548,12 @@ pub unsafe fn vqrdmlah_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_ #[target_feature(enable = "rdm")] #[cfg_attr(test, assert_instr(sqrdmlah))] pub unsafe fn vqrdmlahq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { - vqaddq_s16(a, vqrdmulhq_s16(b, c)) + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlah.v8i16")] + fn vqrdmlahq_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t; + } + vqrdmlahq_s16_(a, b, c) } /// Signed saturating rounding doubling multiply accumulate returning high half @@ -10551,7 +10561,12 @@ pub unsafe fn vqrdmlahq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8 #[target_feature(enable = "rdm")] #[cfg_attr(test, assert_instr(sqrdmlah))] pub unsafe fn vqrdmlah_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { - vqadd_s32(a, vqrdmulh_s32(b, c)) + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlah.v2i32")] + fn vqrdmlah_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t; + } + vqrdmlah_s32_(a, b, c) } /// Signed saturating rounding doubling multiply accumulate returning high half @@ -10559,7 +10574,12 @@ pub unsafe fn vqrdmlah_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_ #[target_feature(enable = "rdm")] #[cfg_attr(test, assert_instr(sqrdmlah))] pub unsafe fn vqrdmlahq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { - vqaddq_s32(a, vqrdmulhq_s32(b, c)) + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlah.v4i32")] + fn vqrdmlahq_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t; + } + vqrdmlahq_s32_(a, b, c) } /// Signed saturating rounding doubling multiply accumulate returning high half @@ -10591,7 +10611,8 @@ pub unsafe fn vqrdmlahs_s32(a: i32, b: i32, c: i32) -> i32 { #[rustc_legacy_const_generics(3)] pub unsafe fn vqrdmlah_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { static_assert_imm2!(LANE); - vqadd_s16(a, vqrdmulh_lane_s16::(b, c)) + let c: int16x4_t = simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + vqrdmlah_s16(a, b, c) } /// Signed saturating rounding doubling multiply accumulate returning high half @@ -10601,7 +10622,8 @@ pub unsafe fn vqrdmlah_lane_s16(a: int16x4_t, b: int16x4_t, c: #[rustc_legacy_const_generics(3)] pub unsafe fn vqrdmlah_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t { static_assert_imm3!(LANE); - vqadd_s16(a, vqrdmulh_laneq_s16::(b, c)) + let c: int16x4_t = simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + vqrdmlah_s16(a, b, c) } /// Signed saturating rounding doubling multiply accumulate returning high half @@ -10611,7 +10633,8 @@ pub unsafe fn vqrdmlah_laneq_s16(a: int16x4_t, b: int16x4_t, c: #[rustc_legacy_const_generics(3)] pub unsafe fn vqrdmlahq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t { static_assert_imm2!(LANE); - vqaddq_s16(a, vqrdmulhq_lane_s16::(b, c)) + let c: int16x8_t = simd_shuffle8!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + vqrdmlahq_s16(a, b, c) } /// Signed saturating rounding doubling multiply accumulate returning high half @@ -10621,7 +10644,8 @@ pub unsafe fn vqrdmlahq_lane_s16(a: int16x8_t, b: int16x8_t, c: #[rustc_legacy_const_generics(3)] pub unsafe fn vqrdmlahq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { static_assert_imm3!(LANE); - vqaddq_s16(a, vqrdmulhq_laneq_s16::(b, c)) + let c: int16x8_t = simd_shuffle8!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + vqrdmlahq_s16(a, b, c) } /// Signed saturating rounding doubling multiply accumulate returning high half @@ -10631,7 +10655,8 @@ pub unsafe fn vqrdmlahq_laneq_s16(a: int16x8_t, b: int16x8_t, c #[rustc_legacy_const_generics(3)] pub unsafe fn vqrdmlah_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { static_assert_imm1!(LANE); - vqadd_s32(a, vqrdmulh_lane_s32::(b, c)) + let c: int32x2_t = simd_shuffle2!(c, c, [LANE as u32, LANE as u32]); + vqrdmlah_s32(a, b, c) } /// Signed saturating rounding doubling multiply accumulate returning high half @@ -10641,7 +10666,8 @@ pub unsafe fn vqrdmlah_lane_s32(a: int32x2_t, b: int32x2_t, c: #[rustc_legacy_const_generics(3)] pub unsafe fn vqrdmlah_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t { static_assert_imm2!(LANE); - vqadd_s32(a, vqrdmulh_laneq_s32::(b, c)) + let c: int32x2_t = simd_shuffle2!(c, c, [LANE as u32, LANE as u32]); + vqrdmlah_s32(a, b, c) } /// Signed saturating rounding doubling multiply accumulate returning high half @@ -10651,7 +10677,8 @@ pub unsafe fn vqrdmlah_laneq_s32(a: int32x2_t, b: int32x2_t, c: #[rustc_legacy_const_generics(3)] pub unsafe fn vqrdmlahq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t { static_assert_imm1!(LANE); - vqaddq_s32(a, vqrdmulhq_lane_s32::(b, c)) + let c: int32x4_t = simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + vqrdmlahq_s32(a, b, c) } /// Signed saturating rounding doubling multiply accumulate returning high half @@ -10661,7 +10688,8 @@ pub unsafe fn vqrdmlahq_lane_s32(a: int32x4_t, b: int32x4_t, c: #[rustc_legacy_const_generics(3)] pub unsafe fn vqrdmlahq_laneq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { static_assert_imm2!(LANE); - vqaddq_s32(a, vqrdmulhq_laneq_s32::(b, c)) + let c: int32x4_t = simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + vqrdmlahq_s32(a, b, c) } /// Signed saturating rounding doubling multiply accumulate returning high half diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs index 7837307e44..1f7e41975b 100644 --- a/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/crates/core_arch/src/arm_shared/neon/generated.rs @@ -16535,7 +16535,7 @@ pub unsafe fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { vmaxq_f32_(a, b) } -/// Floating-point Maximun Number (vector) +/// Floating-point Maximum Number (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] @@ -16552,7 +16552,7 @@ pub unsafe fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { vmaxnm_f32_(a, b) } -/// Floating-point Maximun Number (vector) +/// Floating-point Maximum Number (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] @@ -16807,7 +16807,7 @@ pub unsafe fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { vminq_f32_(a, b) } -/// Floating-point Minimun Number (vector) +/// Floating-point Minimum Number (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] @@ -16824,7 +16824,7 @@ pub unsafe fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { vminnm_f32_(a, b) } -/// Floating-point Minimun Number (vector) +/// Floating-point Minimum Number (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] diff --git a/crates/intrinsic-test/missing_aarch64.txt b/crates/intrinsic-test/missing_aarch64.txt index b6ba2eab05..56ec274b5a 100644 --- a/crates/intrinsic-test/missing_aarch64.txt +++ b/crates/intrinsic-test/missing_aarch64.txt @@ -108,26 +108,3 @@ vqshluq_n_s32 vqshluq_n_s64 vqshluq_n_s8 vqshlus_n_s32 - -# These tests produce a different result from C but only in debug builds of -# stdarch. This likely both a bug in stdarch (expanding to a different LLVM -# intrinsic) and a bug in LLVM (incorrect optimization changing the behavior of -# integer operations). -vqrdmlah_lane_s16 -vqrdmlah_lane_s32 -vqrdmlah_laneq_s16 -vqrdmlah_laneq_s32 -vqrdmlah_s16 -vqrdmlah_s32 -vqrdmlahh_lane_s16 -vqrdmlahh_laneq_s16 -vqrdmlahh_s16 -vqrdmlahq_lane_s16 -vqrdmlahq_lane_s32 -vqrdmlahq_laneq_s16 -vqrdmlahq_laneq_s32 -vqrdmlahq_s16 -vqrdmlahq_s32 -vqrdmlahs_lane_s32 -vqrdmlahs_laneq_s32 -vqrdmlahs_s32 diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec index c4a720aa35..b3eb07e997 100644 --- a/crates/stdarch-gen/neon.spec +++ b/crates/stdarch-gen/neon.spec @@ -846,7 +846,7 @@ a = MIN, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0 fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 validate TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE -aarch64 = sshr +aarch64 = cmlt generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t /// Floating-point compare less than zero @@ -1069,7 +1069,7 @@ b = MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 n = 1:0 validate 1, MAX -aarch64 = zip1 +aarch64 = mov generate int64x2_t:int64x1_t:int64x2_t, uint64x2_t:uint64x1_t:uint64x2_t, poly64x2_t:poly64x1_t:poly64x2_t /// Insert vector element from another vector element @@ -1087,7 +1087,7 @@ validate 1., 0.5, 3., 4. aarch64 = mov generate float32x4_t:float32x2_t:float32x4_t -aarch64 = zip1 +aarch64 = mov generate float64x2_t:float64x1_t:float64x2_t /// Insert vector element from another vector element @@ -5527,13 +5527,13 @@ generate i16:int16x4_t:i16, i16:int16x8_t:i16, i32:int32x2_t:i32, i32:int32x4_t: /// Signed saturating rounding doubling multiply accumulate returning high half name = vqrdmlah -multi_fn = vqadd-out-noext, a, {vqrdmulh-out-noext, b, c} a = 1, 1, 1, 1, 1, 1, 1, 1 b = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX c = 2, 2, 2, 2, 2, 2, 2, 2 validate 3, 3, 3, 3, 3, 3, 3, 3 aarch64 = sqrdmlah +link-aarch64 = sqrdmlah._EXT_ target = rdm generate int16x4_t, int16x8_t, int32x2_t, int32x4_t @@ -5557,7 +5557,8 @@ name = vqrdmlah in2-lane-suffixes constn = LANE multi_fn = static_assert_imm-in2_exp_len-LANE -multi_fn = vqadd-self-noext, a, {vqrdmulh-in2lane-::, b, c} +multi_fn = simd_shuffle-out_len-!, c:out_t, c, c, {dup-out_len-LANE as u32} +multi_fn = vqrdmlah-out-noext, a, b, c a = 1, 1, 1, 1, 1, 1, 1, 1 b = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX c = 0, 2, 0, 0, 0, 0, 0, 0