Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stabilize vget_low_s8 on AArch64 #1284

Merged
merged 2 commits into from
Feb 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 56 additions & 28 deletions crates/core_arch/src/aarch64/neon/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1564,7 +1564,7 @@ pub unsafe fn vclezd_f64(a: f64) -> u64 {
/// Compare signed less than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sshr))]
#[cfg_attr(test, assert_instr(cmlt))]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub unsafe fn vcltz_s8(a: int8x8_t) -> uint8x8_t {
let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
Expand All @@ -1574,7 +1574,7 @@ pub unsafe fn vcltz_s8(a: int8x8_t) -> uint8x8_t {
/// Compare signed less than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sshr))]
#[cfg_attr(test, assert_instr(cmlt))]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub unsafe fn vcltzq_s8(a: int8x16_t) -> uint8x16_t {
let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
Expand All @@ -1584,7 +1584,7 @@ pub unsafe fn vcltzq_s8(a: int8x16_t) -> uint8x16_t {
/// Compare signed less than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sshr))]
#[cfg_attr(test, assert_instr(cmlt))]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub unsafe fn vcltz_s16(a: int16x4_t) -> uint16x4_t {
let b: i16x4 = i16x4::new(0, 0, 0, 0);
Expand All @@ -1594,7 +1594,7 @@ pub unsafe fn vcltz_s16(a: int16x4_t) -> uint16x4_t {
/// Compare signed less than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sshr))]
#[cfg_attr(test, assert_instr(cmlt))]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub unsafe fn vcltzq_s16(a: int16x8_t) -> uint16x8_t {
let b: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
Expand All @@ -1604,7 +1604,7 @@ pub unsafe fn vcltzq_s16(a: int16x8_t) -> uint16x8_t {
/// Compare signed less than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sshr))]
#[cfg_attr(test, assert_instr(cmlt))]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub unsafe fn vcltz_s32(a: int32x2_t) -> uint32x2_t {
let b: i32x2 = i32x2::new(0, 0);
Expand All @@ -1614,7 +1614,7 @@ pub unsafe fn vcltz_s32(a: int32x2_t) -> uint32x2_t {
/// Compare signed less than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sshr))]
#[cfg_attr(test, assert_instr(cmlt))]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub unsafe fn vcltzq_s32(a: int32x4_t) -> uint32x4_t {
let b: i32x4 = i32x4::new(0, 0, 0, 0);
Expand All @@ -1624,7 +1624,7 @@ pub unsafe fn vcltzq_s32(a: int32x4_t) -> uint32x4_t {
/// Compare signed less than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sshr))]
#[cfg_attr(test, assert_instr(cmlt))]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub unsafe fn vcltz_s64(a: int64x1_t) -> uint64x1_t {
let b: i64x1 = i64x1::new(0);
Expand All @@ -1634,7 +1634,7 @@ pub unsafe fn vcltz_s64(a: int64x1_t) -> uint64x1_t {
/// Compare signed less than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sshr))]
#[cfg_attr(test, assert_instr(cmlt))]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub unsafe fn vcltzq_s64(a: int64x2_t) -> uint64x2_t {
let b: i64x2 = i64x2::new(0, 0);
Expand Down Expand Up @@ -2714,7 +2714,7 @@ pub unsafe fn vcopyq_lane_p16<const LANE1: i32, const LANE2: i32>(a: poly16x8_t,
/// Insert vector element from another vector element
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(zip1, LANE1 = 1, LANE2 = 0))]
#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
#[rustc_legacy_const_generics(1, 3)]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub unsafe fn vcopyq_lane_s64<const LANE1: i32, const LANE2: i32>(a: int64x2_t, b: int64x1_t) -> int64x2_t {
Expand All @@ -2731,7 +2731,7 @@ pub unsafe fn vcopyq_lane_s64<const LANE1: i32, const LANE2: i32>(a: int64x2_t,
/// Insert vector element from another vector element
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(zip1, LANE1 = 1, LANE2 = 0))]
#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
#[rustc_legacy_const_generics(1, 3)]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub unsafe fn vcopyq_lane_u64<const LANE1: i32, const LANE2: i32>(a: uint64x2_t, b: uint64x1_t) -> uint64x2_t {
Expand All @@ -2748,7 +2748,7 @@ pub unsafe fn vcopyq_lane_u64<const LANE1: i32, const LANE2: i32>(a: uint64x2_t,
/// Insert vector element from another vector element
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(zip1, LANE1 = 1, LANE2 = 0))]
#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
#[rustc_legacy_const_generics(1, 3)]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub unsafe fn vcopyq_lane_p64<const LANE1: i32, const LANE2: i32>(a: poly64x2_t, b: poly64x1_t) -> poly64x2_t {
Expand Down Expand Up @@ -2784,7 +2784,7 @@ pub unsafe fn vcopyq_lane_f32<const LANE1: i32, const LANE2: i32>(a: float32x4_t
/// Insert vector element from another vector element
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(zip1, LANE1 = 1, LANE2 = 0))]
#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
#[rustc_legacy_const_generics(1, 3)]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub unsafe fn vcopyq_lane_f64<const LANE1: i32, const LANE2: i32>(a: float64x2_t, b: float64x1_t) -> float64x2_t {
Expand Down Expand Up @@ -9183,7 +9183,7 @@ pub unsafe fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
vmaxq_f64_(a, b)
}

/// Floating-point Maximun Number (vector)
/// Floating-point Maximum Number (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmaxnm))]
Expand All @@ -9197,7 +9197,7 @@ pub unsafe fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
vmaxnm_f64_(a, b)
}

/// Floating-point Maximun Number (vector)
/// Floating-point Maximum Number (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmaxnm))]
Expand Down Expand Up @@ -9379,7 +9379,7 @@ pub unsafe fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
vminq_f64_(a, b)
}

/// Floating-point Minimun Number (vector)
/// Floating-point Minimum Number (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fminnm))]
Expand All @@ -9393,7 +9393,7 @@ pub unsafe fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
vminnm_f64_(a, b)
}

/// Floating-point Minimun Number (vector)
/// Floating-point Minimum Number (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fminnm))]
Expand Down Expand Up @@ -10535,31 +10535,51 @@ pub unsafe fn vqrdmulhs_laneq_s32<const LANE: i32>(a: i32, b: int32x4_t) -> i32
#[target_feature(enable = "rdm")]
#[cfg_attr(test, assert_instr(sqrdmlah))]
pub unsafe fn vqrdmlah_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
vqadd_s16(a, vqrdmulh_s16(b, c))
#[allow(improper_ctypes)]
extern "unadjusted" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlah.v4i16")]
fn vqrdmlah_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t;
}
vqrdmlah_s16_(a, b, c)
}

/// Signed saturating rounding doubling multiply accumulate returning high half
#[inline]
#[target_feature(enable = "rdm")]
#[cfg_attr(test, assert_instr(sqrdmlah))]
pub unsafe fn vqrdmlahq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
vqaddq_s16(a, vqrdmulhq_s16(b, c))
#[allow(improper_ctypes)]
extern "unadjusted" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlah.v8i16")]
fn vqrdmlahq_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
}
vqrdmlahq_s16_(a, b, c)
}

/// Signed saturating rounding doubling multiply accumulate returning high half
#[inline]
#[target_feature(enable = "rdm")]
#[cfg_attr(test, assert_instr(sqrdmlah))]
pub unsafe fn vqrdmlah_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
vqadd_s32(a, vqrdmulh_s32(b, c))
#[allow(improper_ctypes)]
extern "unadjusted" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlah.v2i32")]
fn vqrdmlah_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t;
}
vqrdmlah_s32_(a, b, c)
}

/// Signed saturating rounding doubling multiply accumulate returning high half
#[inline]
#[target_feature(enable = "rdm")]
#[cfg_attr(test, assert_instr(sqrdmlah))]
pub unsafe fn vqrdmlahq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
vqaddq_s32(a, vqrdmulhq_s32(b, c))
#[allow(improper_ctypes)]
extern "unadjusted" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlah.v4i32")]
fn vqrdmlahq_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
}
vqrdmlahq_s32_(a, b, c)
}

/// Signed saturating rounding doubling multiply accumulate returning high half
Expand Down Expand Up @@ -10591,7 +10611,8 @@ pub unsafe fn vqrdmlahs_s32(a: i32, b: i32, c: i32) -> i32 {
#[rustc_legacy_const_generics(3)]
pub unsafe fn vqrdmlah_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
static_assert_imm2!(LANE);
vqadd_s16(a, vqrdmulh_lane_s16::<LANE>(b, c))
let c: int16x4_t = simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
vqrdmlah_s16(a, b, c)
}

/// Signed saturating rounding doubling multiply accumulate returning high half
Expand All @@ -10601,7 +10622,8 @@ pub unsafe fn vqrdmlah_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c:
#[rustc_legacy_const_generics(3)]
pub unsafe fn vqrdmlah_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
static_assert_imm3!(LANE);
vqadd_s16(a, vqrdmulh_laneq_s16::<LANE>(b, c))
let c: int16x4_t = simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
vqrdmlah_s16(a, b, c)
}

/// Signed saturating rounding doubling multiply accumulate returning high half
Expand All @@ -10611,7 +10633,8 @@ pub unsafe fn vqrdmlah_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c:
#[rustc_legacy_const_generics(3)]
pub unsafe fn vqrdmlahq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
static_assert_imm2!(LANE);
vqaddq_s16(a, vqrdmulhq_lane_s16::<LANE>(b, c))
let c: int16x8_t = simd_shuffle8!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
vqrdmlahq_s16(a, b, c)
}

/// Signed saturating rounding doubling multiply accumulate returning high half
Expand All @@ -10621,7 +10644,8 @@ pub unsafe fn vqrdmlahq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c:
#[rustc_legacy_const_generics(3)]
pub unsafe fn vqrdmlahq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
static_assert_imm3!(LANE);
vqaddq_s16(a, vqrdmulhq_laneq_s16::<LANE>(b, c))
let c: int16x8_t = simd_shuffle8!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
vqrdmlahq_s16(a, b, c)
}

/// Signed saturating rounding doubling multiply accumulate returning high half
Expand All @@ -10631,7 +10655,8 @@ pub unsafe fn vqrdmlahq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c
#[rustc_legacy_const_generics(3)]
pub unsafe fn vqrdmlah_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
static_assert_imm1!(LANE);
vqadd_s32(a, vqrdmulh_lane_s32::<LANE>(b, c))
let c: int32x2_t = simd_shuffle2!(c, c, <const LANE: i32> [LANE as u32, LANE as u32]);
vqrdmlah_s32(a, b, c)
}

/// Signed saturating rounding doubling multiply accumulate returning high half
Expand All @@ -10641,7 +10666,8 @@ pub unsafe fn vqrdmlah_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c:
#[rustc_legacy_const_generics(3)]
pub unsafe fn vqrdmlah_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
static_assert_imm2!(LANE);
vqadd_s32(a, vqrdmulh_laneq_s32::<LANE>(b, c))
let c: int32x2_t = simd_shuffle2!(c, c, <const LANE: i32> [LANE as u32, LANE as u32]);
vqrdmlah_s32(a, b, c)
}

/// Signed saturating rounding doubling multiply accumulate returning high half
Expand All @@ -10651,7 +10677,8 @@ pub unsafe fn vqrdmlah_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c:
#[rustc_legacy_const_generics(3)]
pub unsafe fn vqrdmlahq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
static_assert_imm1!(LANE);
vqaddq_s32(a, vqrdmulhq_lane_s32::<LANE>(b, c))
let c: int32x4_t = simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
vqrdmlahq_s32(a, b, c)
}

/// Signed saturating rounding doubling multiply accumulate returning high half
Expand All @@ -10661,7 +10688,8 @@ pub unsafe fn vqrdmlahq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c:
#[rustc_legacy_const_generics(3)]
pub unsafe fn vqrdmlahq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
static_assert_imm2!(LANE);
vqaddq_s32(a, vqrdmulhq_laneq_s32::<LANE>(b, c))
let c: int32x4_t = simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
vqrdmlahq_s32(a, b, c)
}

/// Signed saturating rounding doubling multiply accumulate returning high half
Expand Down
8 changes: 4 additions & 4 deletions crates/core_arch/src/arm_shared/neon/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16535,7 +16535,7 @@ pub unsafe fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
vmaxq_f32_(a, b)
}

/// Floating-point Maximun Number (vector)
/// Floating-point Maximum Number (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
Expand All @@ -16552,7 +16552,7 @@ pub unsafe fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
vmaxnm_f32_(a, b)
}

/// Floating-point Maximun Number (vector)
/// Floating-point Maximum Number (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
Expand Down Expand Up @@ -16807,7 +16807,7 @@ pub unsafe fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
vminq_f32_(a, b)
}

/// Floating-point Minimun Number (vector)
/// Floating-point Minimum Number (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
Expand All @@ -16824,7 +16824,7 @@ pub unsafe fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
vminnm_f32_(a, b)
}

/// Floating-point Minimun Number (vector)
/// Floating-point Minimum Number (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
Expand Down
4 changes: 4 additions & 0 deletions crates/core_arch/src/arm_shared/neon/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4989,6 +4989,10 @@ pub unsafe fn vget_high_f32(a: float32x4_t) -> float32x2_t {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(test, assert_instr(nop))]
#[cfg_attr(
target_arch = "aarch64",
stable(feature = "vget_low_s8", since = "1.60.0")
)]
pub unsafe fn vget_low_s8(a: int8x16_t) -> int8x8_t {
simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
}
Expand Down
23 changes: 0 additions & 23 deletions crates/intrinsic-test/missing_aarch64.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,26 +108,3 @@ vqshluq_n_s32
vqshluq_n_s64
vqshluq_n_s8
vqshlus_n_s32

# These tests produce a different result from C but only in debug builds of
# stdarch. This likely both a bug in stdarch (expanding to a different LLVM
# intrinsic) and a bug in LLVM (incorrect optimization changing the behavior of
# integer operations).
vqrdmlah_lane_s16
vqrdmlah_lane_s32
vqrdmlah_laneq_s16
vqrdmlah_laneq_s32
vqrdmlah_s16
vqrdmlah_s32
vqrdmlahh_lane_s16
vqrdmlahh_laneq_s16
vqrdmlahh_s16
vqrdmlahq_lane_s16
vqrdmlahq_lane_s32
vqrdmlahq_laneq_s16
vqrdmlahq_laneq_s32
vqrdmlahq_s16
vqrdmlahq_s32
vqrdmlahs_lane_s32
vqrdmlahs_laneq_s32
vqrdmlahs_s32
Loading