Skip to content

Commit 219cf81

Browse files
Correct the vqrdmlah intrinsics. (#1246)
1 parent 2eb8706 commit 219cf81

File tree

4 files changed

+274
-282
lines changed

4 files changed

+274
-282
lines changed

Diff for: crates/core_arch/src/aarch64/neon/generated.rs

+256-18
Original file line numberDiff line numberDiff line change
@@ -9639,58 +9639,176 @@ pub unsafe fn vqrdmulhs_laneq_s32<const LANE: i32>(a: i32, b: int32x4_t) -> i32
96399639

96409640
/// Signed saturating rounding doubling multiply accumulate returning high half
96419641
#[inline]
9642-
#[target_feature(enable = "neon")]
9643-
#[cfg_attr(test, assert_instr(sqrdmulh))]
9642+
#[target_feature(enable = "rdm")]
9643+
#[cfg_attr(test, assert_instr(sqrdmlah))]
9644+
pub unsafe fn vqrdmlah_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
9645+
vqadd_s16(a, vqrdmulh_s16(b, c))
9646+
}
9647+
9648+
/// Signed saturating rounding doubling multiply accumulate returning high half
9649+
#[inline]
9650+
#[target_feature(enable = "rdm")]
9651+
#[cfg_attr(test, assert_instr(sqrdmlah))]
9652+
pub unsafe fn vqrdmlahq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
9653+
vqaddq_s16(a, vqrdmulhq_s16(b, c))
9654+
}
9655+
9656+
/// Signed saturating rounding doubling multiply accumulate returning high half
9657+
#[inline]
9658+
#[target_feature(enable = "rdm")]
9659+
#[cfg_attr(test, assert_instr(sqrdmlah))]
9660+
pub unsafe fn vqrdmlah_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
9661+
vqadd_s32(a, vqrdmulh_s32(b, c))
9662+
}
9663+
9664+
/// Signed saturating rounding doubling multiply accumulate returning high half
9665+
#[inline]
9666+
#[target_feature(enable = "rdm")]
9667+
#[cfg_attr(test, assert_instr(sqrdmlah))]
9668+
pub unsafe fn vqrdmlahq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
9669+
vqaddq_s32(a, vqrdmulhq_s32(b, c))
9670+
}
9671+
9672+
/// Signed saturating rounding doubling multiply accumulate returning high half
9673+
#[inline]
9674+
#[target_feature(enable = "rdm")]
9675+
#[cfg_attr(test, assert_instr(sqrdmlah))]
96449676
pub unsafe fn vqrdmlahh_s16(a: i16, b: i16, c: i16) -> i16 {
9645-
vqaddh_s16(a, vqrdmulhh_s16(b, c))
9677+
let a: int16x4_t = vdup_n_s16(a);
9678+
let b: int16x4_t = vdup_n_s16(b);
9679+
let c: int16x4_t = vdup_n_s16(c);
9680+
simd_extract(vqrdmlah_s16(a, b, c), 0)
96469681
}
96479682

96489683
/// Signed saturating rounding doubling multiply accumulate returning high half
96499684
#[inline]
9650-
#[target_feature(enable = "neon")]
9651-
#[cfg_attr(test, assert_instr(sqrdmulh))]
9685+
#[target_feature(enable = "rdm")]
9686+
#[cfg_attr(test, assert_instr(sqrdmlah))]
96529687
pub unsafe fn vqrdmlahs_s32(a: i32, b: i32, c: i32) -> i32 {
9653-
vqadds_s32(a, vqrdmulhs_s32(b, c))
9688+
let a: int32x2_t = vdup_n_s32(a);
9689+
let b: int32x2_t = vdup_n_s32(b);
9690+
let c: int32x2_t = vdup_n_s32(c);
9691+
simd_extract(vqrdmlah_s32(a, b, c), 0)
96549692
}
96559693

96569694
/// Signed saturating rounding doubling multiply accumulate returning high half
96579695
#[inline]
9658-
#[target_feature(enable = "neon")]
9659-
#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
9696+
#[target_feature(enable = "rdm")]
9697+
#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
9698+
#[rustc_legacy_const_generics(3)]
9699+
pub unsafe fn vqrdmlah_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
9700+
static_assert_imm2!(LANE);
9701+
vqadd_s16(a, vqrdmulh_lane_s16::<LANE>(b, c))
9702+
}
9703+
9704+
/// Signed saturating rounding doubling multiply accumulate returning high half
9705+
#[inline]
9706+
#[target_feature(enable = "rdm")]
9707+
#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
9708+
#[rustc_legacy_const_generics(3)]
9709+
pub unsafe fn vqrdmlah_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
9710+
static_assert_imm3!(LANE);
9711+
vqadd_s16(a, vqrdmulh_laneq_s16::<LANE>(b, c))
9712+
}
9713+
9714+
/// Signed saturating rounding doubling multiply accumulate returning high half
9715+
#[inline]
9716+
#[target_feature(enable = "rdm")]
9717+
#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
9718+
#[rustc_legacy_const_generics(3)]
9719+
pub unsafe fn vqrdmlahq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
9720+
static_assert_imm2!(LANE);
9721+
vqaddq_s16(a, vqrdmulhq_lane_s16::<LANE>(b, c))
9722+
}
9723+
9724+
/// Signed saturating rounding doubling multiply accumulate returning high half
9725+
#[inline]
9726+
#[target_feature(enable = "rdm")]
9727+
#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
9728+
#[rustc_legacy_const_generics(3)]
9729+
pub unsafe fn vqrdmlahq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
9730+
static_assert_imm3!(LANE);
9731+
vqaddq_s16(a, vqrdmulhq_laneq_s16::<LANE>(b, c))
9732+
}
9733+
9734+
/// Signed saturating rounding doubling multiply accumulate returning high half
9735+
#[inline]
9736+
#[target_feature(enable = "rdm")]
9737+
#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
9738+
#[rustc_legacy_const_generics(3)]
9739+
pub unsafe fn vqrdmlah_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
9740+
static_assert_imm1!(LANE);
9741+
vqadd_s32(a, vqrdmulh_lane_s32::<LANE>(b, c))
9742+
}
9743+
9744+
/// Signed saturating rounding doubling multiply accumulate returning high half
9745+
#[inline]
9746+
#[target_feature(enable = "rdm")]
9747+
#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
9748+
#[rustc_legacy_const_generics(3)]
9749+
pub unsafe fn vqrdmlah_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
9750+
static_assert_imm2!(LANE);
9751+
vqadd_s32(a, vqrdmulh_laneq_s32::<LANE>(b, c))
9752+
}
9753+
9754+
/// Signed saturating rounding doubling multiply accumulate returning high half
9755+
#[inline]
9756+
#[target_feature(enable = "rdm")]
9757+
#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
9758+
#[rustc_legacy_const_generics(3)]
9759+
pub unsafe fn vqrdmlahq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
9760+
static_assert_imm1!(LANE);
9761+
vqaddq_s32(a, vqrdmulhq_lane_s32::<LANE>(b, c))
9762+
}
9763+
9764+
/// Signed saturating rounding doubling multiply accumulate returning high half
9765+
#[inline]
9766+
#[target_feature(enable = "rdm")]
9767+
#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
9768+
#[rustc_legacy_const_generics(3)]
9769+
pub unsafe fn vqrdmlahq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
9770+
static_assert_imm2!(LANE);
9771+
vqaddq_s32(a, vqrdmulhq_laneq_s32::<LANE>(b, c))
9772+
}
9773+
9774+
/// Signed saturating rounding doubling multiply accumulate returning high half
9775+
#[inline]
9776+
#[target_feature(enable = "rdm")]
9777+
#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
96609778
#[rustc_legacy_const_generics(3)]
96619779
pub unsafe fn vqrdmlahh_lane_s16<const LANE: i32>(a: i16, b: i16, c: int16x4_t) -> i16 {
96629780
static_assert_imm2!(LANE);
9663-
vqaddh_s16(a, vqrdmulhh_lane_s16::<LANE>(b, c))
9781+
vqrdmlahh_s16(a, b, simd_extract(c, LANE as u32))
96649782
}
96659783

96669784
/// Signed saturating rounding doubling multiply accumulate returning high half
96679785
#[inline]
9668-
#[target_feature(enable = "neon")]
9669-
#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
9786+
#[target_feature(enable = "rdm")]
9787+
#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
96709788
#[rustc_legacy_const_generics(3)]
96719789
pub unsafe fn vqrdmlahh_laneq_s16<const LANE: i32>(a: i16, b: i16, c: int16x8_t) -> i16 {
96729790
static_assert_imm3!(LANE);
9673-
vqaddh_s16(a, vqrdmulhh_laneq_s16::<LANE>(b, c))
9791+
vqrdmlahh_s16(a, b, simd_extract(c, LANE as u32))
96749792
}
96759793

96769794
/// Signed saturating rounding doubling multiply accumulate returning high half
96779795
#[inline]
9678-
#[target_feature(enable = "neon")]
9679-
#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
9796+
#[target_feature(enable = "rdm")]
9797+
#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
96809798
#[rustc_legacy_const_generics(3)]
96819799
pub unsafe fn vqrdmlahs_lane_s32<const LANE: i32>(a: i32, b: i32, c: int32x2_t) -> i32 {
96829800
static_assert_imm1!(LANE);
9683-
vqadds_s32(a, vqrdmulhs_lane_s32::<LANE>(b, c))
9801+
vqrdmlahs_s32(a, b, simd_extract(c, LANE as u32))
96849802
}
96859803

96869804
/// Signed saturating rounding doubling multiply accumulate returning high half
96879805
#[inline]
9688-
#[target_feature(enable = "neon")]
9689-
#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
9806+
#[target_feature(enable = "rdm")]
9807+
#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
96909808
#[rustc_legacy_const_generics(3)]
96919809
pub unsafe fn vqrdmlahs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t) -> i32 {
96929810
static_assert_imm2!(LANE);
9693-
vqadds_s32(a, vqrdmulhs_laneq_s32::<LANE>(b, c))
9811+
vqrdmlahs_s32(a, b, simd_extract(c, LANE as u32))
96949812
}
96959813

96969814
/// Signed saturating rounding doubling multiply subtract returning high half
@@ -20709,6 +20827,46 @@ mod test {
2070920827
assert_eq!(r, e);
2071020828
}
2071120829

20830+
#[simd_test(enable = "neon")]
20831+
unsafe fn test_vqrdmlah_s16() {
20832+
let a: i16x4 = i16x4::new(1, 1, 1, 1);
20833+
let b: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
20834+
let c: i16x4 = i16x4::new(2, 2, 2, 2);
20835+
let e: i16x4 = i16x4::new(3, 3, 3, 3);
20836+
let r: i16x4 = transmute(vqrdmlah_s16(transmute(a), transmute(b), transmute(c)));
20837+
assert_eq!(r, e);
20838+
}
20839+
20840+
#[simd_test(enable = "neon")]
20841+
unsafe fn test_vqrdmlahq_s16() {
20842+
let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
20843+
let b: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
20844+
let c: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
20845+
let e: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3);
20846+
let r: i16x8 = transmute(vqrdmlahq_s16(transmute(a), transmute(b), transmute(c)));
20847+
assert_eq!(r, e);
20848+
}
20849+
20850+
#[simd_test(enable = "neon")]
20851+
unsafe fn test_vqrdmlah_s32() {
20852+
let a: i32x2 = i32x2::new(1, 1);
20853+
let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
20854+
let c: i32x2 = i32x2::new(2, 2);
20855+
let e: i32x2 = i32x2::new(3, 3);
20856+
let r: i32x2 = transmute(vqrdmlah_s32(transmute(a), transmute(b), transmute(c)));
20857+
assert_eq!(r, e);
20858+
}
20859+
20860+
#[simd_test(enable = "neon")]
20861+
unsafe fn test_vqrdmlahq_s32() {
20862+
let a: i32x4 = i32x4::new(1, 1, 1, 1);
20863+
let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
20864+
let c: i32x4 = i32x4::new(2, 2, 2, 2);
20865+
let e: i32x4 = i32x4::new(3, 3, 3, 3);
20866+
let r: i32x4 = transmute(vqrdmlahq_s32(transmute(a), transmute(b), transmute(c)));
20867+
assert_eq!(r, e);
20868+
}
20869+
2071220870
#[simd_test(enable = "neon")]
2071320871
unsafe fn test_vqrdmlahh_s16() {
2071420872
let a: i16 = 1;
@@ -20729,6 +20887,86 @@ mod test {
2072920887
assert_eq!(r, e);
2073020888
}
2073120889

20890+
#[simd_test(enable = "neon")]
20891+
unsafe fn test_vqrdmlah_lane_s16() {
20892+
let a: i16x4 = i16x4::new(1, 1, 1, 1);
20893+
let b: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
20894+
let c: i16x4 = i16x4::new(0, 2, 0, 0);
20895+
let e: i16x4 = i16x4::new(3, 3, 3, 3);
20896+
let r: i16x4 = transmute(vqrdmlah_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
20897+
assert_eq!(r, e);
20898+
}
20899+
20900+
#[simd_test(enable = "neon")]
20901+
unsafe fn test_vqrdmlah_laneq_s16() {
20902+
let a: i16x4 = i16x4::new(1, 1, 1, 1);
20903+
let b: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
20904+
let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
20905+
let e: i16x4 = i16x4::new(3, 3, 3, 3);
20906+
let r: i16x4 = transmute(vqrdmlah_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
20907+
assert_eq!(r, e);
20908+
}
20909+
20910+
#[simd_test(enable = "neon")]
20911+
unsafe fn test_vqrdmlahq_lane_s16() {
20912+
let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
20913+
let b: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
20914+
let c: i16x4 = i16x4::new(0, 2, 0, 0);
20915+
let e: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3);
20916+
let r: i16x8 = transmute(vqrdmlahq_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
20917+
assert_eq!(r, e);
20918+
}
20919+
20920+
#[simd_test(enable = "neon")]
20921+
unsafe fn test_vqrdmlahq_laneq_s16() {
20922+
let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
20923+
let b: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
20924+
let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
20925+
let e: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3);
20926+
let r: i16x8 = transmute(vqrdmlahq_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
20927+
assert_eq!(r, e);
20928+
}
20929+
20930+
#[simd_test(enable = "neon")]
20931+
unsafe fn test_vqrdmlah_lane_s32() {
20932+
let a: i32x2 = i32x2::new(1, 1);
20933+
let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
20934+
let c: i32x2 = i32x2::new(0, 2);
20935+
let e: i32x2 = i32x2::new(3, 3);
20936+
let r: i32x2 = transmute(vqrdmlah_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
20937+
assert_eq!(r, e);
20938+
}
20939+
20940+
#[simd_test(enable = "neon")]
20941+
unsafe fn test_vqrdmlah_laneq_s32() {
20942+
let a: i32x2 = i32x2::new(1, 1);
20943+
let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
20944+
let c: i32x4 = i32x4::new(0, 2, 0, 0);
20945+
let e: i32x2 = i32x2::new(3, 3);
20946+
let r: i32x2 = transmute(vqrdmlah_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
20947+
assert_eq!(r, e);
20948+
}
20949+
20950+
#[simd_test(enable = "neon")]
20951+
unsafe fn test_vqrdmlahq_lane_s32() {
20952+
let a: i32x4 = i32x4::new(1, 1, 1, 1);
20953+
let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
20954+
let c: i32x2 = i32x2::new(0, 2);
20955+
let e: i32x4 = i32x4::new(3, 3, 3, 3);
20956+
let r: i32x4 = transmute(vqrdmlahq_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
20957+
assert_eq!(r, e);
20958+
}
20959+
20960+
#[simd_test(enable = "neon")]
20961+
unsafe fn test_vqrdmlahq_laneq_s32() {
20962+
let a: i32x4 = i32x4::new(1, 1, 1, 1);
20963+
let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
20964+
let c: i32x4 = i32x4::new(0, 2, 0, 0);
20965+
let e: i32x4 = i32x4::new(3, 3, 3, 3);
20966+
let r: i32x4 = transmute(vqrdmlahq_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
20967+
assert_eq!(r, e);
20968+
}
20969+
2073220970
#[simd_test(enable = "neon")]
2073320971
unsafe fn test_vqrdmlahh_lane_s16() {
2073420972
let a: i16 = 1;

0 commit comments

Comments
 (0)