@@ -9639,58 +9639,176 @@ pub unsafe fn vqrdmulhs_laneq_s32<const LANE: i32>(a: i32, b: int32x4_t) -> i32
9639
9639
9640
9640
/// Signed saturating rounding doubling multiply accumulate returning high half
9641
9641
#[inline]
9642
- #[target_feature(enable = "neon")]
9643
- #[cfg_attr(test, assert_instr(sqrdmulh))]
9642
+ #[target_feature(enable = "rdm")]
9643
+ #[cfg_attr(test, assert_instr(sqrdmlah))]
9644
+ pub unsafe fn vqrdmlah_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
9645
+ vqadd_s16(a, vqrdmulh_s16(b, c))
9646
+ }
9647
+
9648
+ /// Signed saturating rounding doubling multiply accumulate returning high half
9649
+ #[inline]
9650
+ #[target_feature(enable = "rdm")]
9651
+ #[cfg_attr(test, assert_instr(sqrdmlah))]
9652
+ pub unsafe fn vqrdmlahq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
9653
+ vqaddq_s16(a, vqrdmulhq_s16(b, c))
9654
+ }
9655
+
9656
+ /// Signed saturating rounding doubling multiply accumulate returning high half
9657
+ #[inline]
9658
+ #[target_feature(enable = "rdm")]
9659
+ #[cfg_attr(test, assert_instr(sqrdmlah))]
9660
+ pub unsafe fn vqrdmlah_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
9661
+ vqadd_s32(a, vqrdmulh_s32(b, c))
9662
+ }
9663
+
9664
+ /// Signed saturating rounding doubling multiply accumulate returning high half
9665
+ #[inline]
9666
+ #[target_feature(enable = "rdm")]
9667
+ #[cfg_attr(test, assert_instr(sqrdmlah))]
9668
+ pub unsafe fn vqrdmlahq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
9669
+ vqaddq_s32(a, vqrdmulhq_s32(b, c))
9670
+ }
9671
+
9672
+ /// Signed saturating rounding doubling multiply accumulate returning high half
9673
+ #[inline]
9674
+ #[target_feature(enable = "rdm")]
9675
+ #[cfg_attr(test, assert_instr(sqrdmlah))]
9644
9676
pub unsafe fn vqrdmlahh_s16(a: i16, b: i16, c: i16) -> i16 {
9645
- vqaddh_s16(a, vqrdmulhh_s16(b, c))
9677
+ let a: int16x4_t = vdup_n_s16(a);
9678
+ let b: int16x4_t = vdup_n_s16(b);
9679
+ let c: int16x4_t = vdup_n_s16(c);
9680
+ simd_extract(vqrdmlah_s16(a, b, c), 0)
9646
9681
}
9647
9682
9648
9683
/// Signed saturating rounding doubling multiply accumulate returning high half
9649
9684
#[inline]
9650
- #[target_feature(enable = "neon ")]
9651
- #[cfg_attr(test, assert_instr(sqrdmulh ))]
9685
+ #[target_feature(enable = "rdm ")]
9686
+ #[cfg_attr(test, assert_instr(sqrdmlah ))]
9652
9687
pub unsafe fn vqrdmlahs_s32(a: i32, b: i32, c: i32) -> i32 {
9653
- vqadds_s32(a, vqrdmulhs_s32(b, c))
9688
+ let a: int32x2_t = vdup_n_s32(a);
9689
+ let b: int32x2_t = vdup_n_s32(b);
9690
+ let c: int32x2_t = vdup_n_s32(c);
9691
+ simd_extract(vqrdmlah_s32(a, b, c), 0)
9654
9692
}
9655
9693
9656
9694
/// Signed saturating rounding doubling multiply accumulate returning high half
9657
9695
#[inline]
9658
- #[target_feature(enable = "neon")]
9659
- #[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
9696
+ #[target_feature(enable = "rdm")]
9697
+ #[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
9698
+ #[rustc_legacy_const_generics(3)]
9699
+ pub unsafe fn vqrdmlah_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
9700
+ static_assert_imm2!(LANE);
9701
+ vqadd_s16(a, vqrdmulh_lane_s16::<LANE>(b, c))
9702
+ }
9703
+
9704
+ /// Signed saturating rounding doubling multiply accumulate returning high half
9705
+ #[inline]
9706
+ #[target_feature(enable = "rdm")]
9707
+ #[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
9708
+ #[rustc_legacy_const_generics(3)]
9709
+ pub unsafe fn vqrdmlah_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
9710
+ static_assert_imm3!(LANE);
9711
+ vqadd_s16(a, vqrdmulh_laneq_s16::<LANE>(b, c))
9712
+ }
9713
+
9714
+ /// Signed saturating rounding doubling multiply accumulate returning high half
9715
+ #[inline]
9716
+ #[target_feature(enable = "rdm")]
9717
+ #[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
9718
+ #[rustc_legacy_const_generics(3)]
9719
+ pub unsafe fn vqrdmlahq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
9720
+ static_assert_imm2!(LANE);
9721
+ vqaddq_s16(a, vqrdmulhq_lane_s16::<LANE>(b, c))
9722
+ }
9723
+
9724
+ /// Signed saturating rounding doubling multiply accumulate returning high half
9725
+ #[inline]
9726
+ #[target_feature(enable = "rdm")]
9727
+ #[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
9728
+ #[rustc_legacy_const_generics(3)]
9729
+ pub unsafe fn vqrdmlahq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
9730
+ static_assert_imm3!(LANE);
9731
+ vqaddq_s16(a, vqrdmulhq_laneq_s16::<LANE>(b, c))
9732
+ }
9733
+
9734
+ /// Signed saturating rounding doubling multiply accumulate returning high half
9735
+ #[inline]
9736
+ #[target_feature(enable = "rdm")]
9737
+ #[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
9738
+ #[rustc_legacy_const_generics(3)]
9739
+ pub unsafe fn vqrdmlah_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
9740
+ static_assert_imm1!(LANE);
9741
+ vqadd_s32(a, vqrdmulh_lane_s32::<LANE>(b, c))
9742
+ }
9743
+
9744
+ /// Signed saturating rounding doubling multiply accumulate returning high half
9745
+ #[inline]
9746
+ #[target_feature(enable = "rdm")]
9747
+ #[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
9748
+ #[rustc_legacy_const_generics(3)]
9749
+ pub unsafe fn vqrdmlah_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
9750
+ static_assert_imm2!(LANE);
9751
+ vqadd_s32(a, vqrdmulh_laneq_s32::<LANE>(b, c))
9752
+ }
9753
+
9754
+ /// Signed saturating rounding doubling multiply accumulate returning high half
9755
+ #[inline]
9756
+ #[target_feature(enable = "rdm")]
9757
+ #[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
9758
+ #[rustc_legacy_const_generics(3)]
9759
+ pub unsafe fn vqrdmlahq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
9760
+ static_assert_imm1!(LANE);
9761
+ vqaddq_s32(a, vqrdmulhq_lane_s32::<LANE>(b, c))
9762
+ }
9763
+
9764
+ /// Signed saturating rounding doubling multiply accumulate returning high half
9765
+ #[inline]
9766
+ #[target_feature(enable = "rdm")]
9767
+ #[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
9768
+ #[rustc_legacy_const_generics(3)]
9769
+ pub unsafe fn vqrdmlahq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
9770
+ static_assert_imm2!(LANE);
9771
+ vqaddq_s32(a, vqrdmulhq_laneq_s32::<LANE>(b, c))
9772
+ }
9773
+
9774
+ /// Signed saturating rounding doubling multiply accumulate returning high half
9775
+ #[inline]
9776
+ #[target_feature(enable = "rdm")]
9777
+ #[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
9660
9778
#[rustc_legacy_const_generics(3)]
9661
9779
pub unsafe fn vqrdmlahh_lane_s16<const LANE: i32>(a: i16, b: i16, c: int16x4_t) -> i16 {
9662
9780
static_assert_imm2!(LANE);
9663
- vqaddh_s16 (a, vqrdmulhh_lane_s16::<LANE>( b, c ))
9781
+ vqrdmlahh_s16 (a, b, simd_extract(c, LANE as u32 ))
9664
9782
}
9665
9783
9666
9784
/// Signed saturating rounding doubling multiply accumulate returning high half
9667
9785
#[inline]
9668
- #[target_feature(enable = "neon ")]
9669
- #[cfg_attr(test, assert_instr(sqrdmulh , LANE = 1))]
9786
+ #[target_feature(enable = "rdm ")]
9787
+ #[cfg_attr(test, assert_instr(sqrdmlah , LANE = 1))]
9670
9788
#[rustc_legacy_const_generics(3)]
9671
9789
pub unsafe fn vqrdmlahh_laneq_s16<const LANE: i32>(a: i16, b: i16, c: int16x8_t) -> i16 {
9672
9790
static_assert_imm3!(LANE);
9673
- vqaddh_s16 (a, vqrdmulhh_laneq_s16::<LANE>( b, c ))
9791
+ vqrdmlahh_s16 (a, b, simd_extract(c, LANE as u32 ))
9674
9792
}
9675
9793
9676
9794
/// Signed saturating rounding doubling multiply accumulate returning high half
9677
9795
#[inline]
9678
- #[target_feature(enable = "neon ")]
9679
- #[cfg_attr(test, assert_instr(sqrdmulh , LANE = 1))]
9796
+ #[target_feature(enable = "rdm ")]
9797
+ #[cfg_attr(test, assert_instr(sqrdmlah , LANE = 1))]
9680
9798
#[rustc_legacy_const_generics(3)]
9681
9799
pub unsafe fn vqrdmlahs_lane_s32<const LANE: i32>(a: i32, b: i32, c: int32x2_t) -> i32 {
9682
9800
static_assert_imm1!(LANE);
9683
- vqadds_s32 (a, vqrdmulhs_lane_s32::<LANE>( b, c ))
9801
+ vqrdmlahs_s32 (a, b, simd_extract(c, LANE as u32 ))
9684
9802
}
9685
9803
9686
9804
/// Signed saturating rounding doubling multiply accumulate returning high half
9687
9805
#[inline]
9688
- #[target_feature(enable = "neon ")]
9689
- #[cfg_attr(test, assert_instr(sqrdmulh , LANE = 1))]
9806
+ #[target_feature(enable = "rdm ")]
9807
+ #[cfg_attr(test, assert_instr(sqrdmlah , LANE = 1))]
9690
9808
#[rustc_legacy_const_generics(3)]
9691
9809
pub unsafe fn vqrdmlahs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t) -> i32 {
9692
9810
static_assert_imm2!(LANE);
9693
- vqadds_s32 (a, vqrdmulhs_laneq_s32::<LANE>( b, c ))
9811
+ vqrdmlahs_s32 (a, b, simd_extract(c, LANE as u32 ))
9694
9812
}
9695
9813
9696
9814
/// Signed saturating rounding doubling multiply subtract returning high half
@@ -20709,6 +20827,46 @@ mod test {
20709
20827
assert_eq!(r, e);
20710
20828
}
20711
20829
20830
+ #[simd_test(enable = "neon")]
20831
+ unsafe fn test_vqrdmlah_s16() {
20832
+ let a: i16x4 = i16x4::new(1, 1, 1, 1);
20833
+ let b: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
20834
+ let c: i16x4 = i16x4::new(2, 2, 2, 2);
20835
+ let e: i16x4 = i16x4::new(3, 3, 3, 3);
20836
+ let r: i16x4 = transmute(vqrdmlah_s16(transmute(a), transmute(b), transmute(c)));
20837
+ assert_eq!(r, e);
20838
+ }
20839
+
20840
+ #[simd_test(enable = "neon")]
20841
+ unsafe fn test_vqrdmlahq_s16() {
20842
+ let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
20843
+ let b: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
20844
+ let c: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
20845
+ let e: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3);
20846
+ let r: i16x8 = transmute(vqrdmlahq_s16(transmute(a), transmute(b), transmute(c)));
20847
+ assert_eq!(r, e);
20848
+ }
20849
+
20850
+ #[simd_test(enable = "neon")]
20851
+ unsafe fn test_vqrdmlah_s32() {
20852
+ let a: i32x2 = i32x2::new(1, 1);
20853
+ let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
20854
+ let c: i32x2 = i32x2::new(2, 2);
20855
+ let e: i32x2 = i32x2::new(3, 3);
20856
+ let r: i32x2 = transmute(vqrdmlah_s32(transmute(a), transmute(b), transmute(c)));
20857
+ assert_eq!(r, e);
20858
+ }
20859
+
20860
+ #[simd_test(enable = "neon")]
20861
+ unsafe fn test_vqrdmlahq_s32() {
20862
+ let a: i32x4 = i32x4::new(1, 1, 1, 1);
20863
+ let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
20864
+ let c: i32x4 = i32x4::new(2, 2, 2, 2);
20865
+ let e: i32x4 = i32x4::new(3, 3, 3, 3);
20866
+ let r: i32x4 = transmute(vqrdmlahq_s32(transmute(a), transmute(b), transmute(c)));
20867
+ assert_eq!(r, e);
20868
+ }
20869
+
20712
20870
#[simd_test(enable = "neon")]
20713
20871
unsafe fn test_vqrdmlahh_s16() {
20714
20872
let a: i16 = 1;
@@ -20729,6 +20887,86 @@ mod test {
20729
20887
assert_eq!(r, e);
20730
20888
}
20731
20889
20890
+ #[simd_test(enable = "neon")]
20891
+ unsafe fn test_vqrdmlah_lane_s16() {
20892
+ let a: i16x4 = i16x4::new(1, 1, 1, 1);
20893
+ let b: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
20894
+ let c: i16x4 = i16x4::new(0, 2, 0, 0);
20895
+ let e: i16x4 = i16x4::new(3, 3, 3, 3);
20896
+ let r: i16x4 = transmute(vqrdmlah_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
20897
+ assert_eq!(r, e);
20898
+ }
20899
+
20900
+ #[simd_test(enable = "neon")]
20901
+ unsafe fn test_vqrdmlah_laneq_s16() {
20902
+ let a: i16x4 = i16x4::new(1, 1, 1, 1);
20903
+ let b: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
20904
+ let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
20905
+ let e: i16x4 = i16x4::new(3, 3, 3, 3);
20906
+ let r: i16x4 = transmute(vqrdmlah_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
20907
+ assert_eq!(r, e);
20908
+ }
20909
+
20910
+ #[simd_test(enable = "neon")]
20911
+ unsafe fn test_vqrdmlahq_lane_s16() {
20912
+ let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
20913
+ let b: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
20914
+ let c: i16x4 = i16x4::new(0, 2, 0, 0);
20915
+ let e: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3);
20916
+ let r: i16x8 = transmute(vqrdmlahq_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
20917
+ assert_eq!(r, e);
20918
+ }
20919
+
20920
+ #[simd_test(enable = "neon")]
20921
+ unsafe fn test_vqrdmlahq_laneq_s16() {
20922
+ let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
20923
+ let b: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
20924
+ let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
20925
+ let e: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3);
20926
+ let r: i16x8 = transmute(vqrdmlahq_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
20927
+ assert_eq!(r, e);
20928
+ }
20929
+
20930
+ #[simd_test(enable = "neon")]
20931
+ unsafe fn test_vqrdmlah_lane_s32() {
20932
+ let a: i32x2 = i32x2::new(1, 1);
20933
+ let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
20934
+ let c: i32x2 = i32x2::new(0, 2);
20935
+ let e: i32x2 = i32x2::new(3, 3);
20936
+ let r: i32x2 = transmute(vqrdmlah_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
20937
+ assert_eq!(r, e);
20938
+ }
20939
+
20940
+ #[simd_test(enable = "neon")]
20941
+ unsafe fn test_vqrdmlah_laneq_s32() {
20942
+ let a: i32x2 = i32x2::new(1, 1);
20943
+ let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
20944
+ let c: i32x4 = i32x4::new(0, 2, 0, 0);
20945
+ let e: i32x2 = i32x2::new(3, 3);
20946
+ let r: i32x2 = transmute(vqrdmlah_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
20947
+ assert_eq!(r, e);
20948
+ }
20949
+
20950
+ #[simd_test(enable = "neon")]
20951
+ unsafe fn test_vqrdmlahq_lane_s32() {
20952
+ let a: i32x4 = i32x4::new(1, 1, 1, 1);
20953
+ let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
20954
+ let c: i32x2 = i32x2::new(0, 2);
20955
+ let e: i32x4 = i32x4::new(3, 3, 3, 3);
20956
+ let r: i32x4 = transmute(vqrdmlahq_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
20957
+ assert_eq!(r, e);
20958
+ }
20959
+
20960
+ #[simd_test(enable = "neon")]
20961
+ unsafe fn test_vqrdmlahq_laneq_s32() {
20962
+ let a: i32x4 = i32x4::new(1, 1, 1, 1);
20963
+ let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
20964
+ let c: i32x4 = i32x4::new(0, 2, 0, 0);
20965
+ let e: i32x4 = i32x4::new(3, 3, 3, 3);
20966
+ let r: i32x4 = transmute(vqrdmlahq_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
20967
+ assert_eq!(r, e);
20968
+ }
20969
+
20732
20970
#[simd_test(enable = "neon")]
20733
20971
unsafe fn test_vqrdmlahh_lane_s16() {
20734
20972
let a: i16 = 1;
0 commit comments