Skip to content

s390x: yet another batch of intrinsics #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 57 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
9426bb5
feat - FEAT_LUT neon instrinsics
Jamesbarford Mar 3, 2025
58538b1
Don't field-project (`.0`) into SIMD types
scottmcm Mar 5, 2025
eba2217
Update SDE mirror to ci-mirrors.rust-lang.org
sayantn Mar 15, 2025
31c9f7a
wasm32: Fix undefined behavior with shift intrinsics
alexcrichton Mar 3, 2025
d4a26b9
Fix rustfmt
alexcrichton Mar 3, 2025
b7a1daf
Document safety conditions of simd shifts
alexcrichton Mar 10, 2025
5192f30
add `vec_reve`
folkertdev Feb 19, 2025
cefe6c9
add `vec_xl` and `vec_xst`
folkertdev Feb 25, 2025
6d6ca49
add `vec_load_len` and `vec_store_len`
folkertdev Feb 25, 2025
21b00cb
add `vec_load_len_r` and `vec_store_len_r`
folkertdev Feb 25, 2025
bb23a48
add `vec_load_bndry`, `__lcbb` and `vec_load_pair`
folkertdev Feb 25, 2025
4f57e52
add `vec_pack`, `vec_packs` and `vec_packsu`
folkertdev Mar 1, 2025
8870be2
add `vec_packs_cc` and `vec_packsu_cc`
folkertdev Mar 1, 2025
d4ba3d2
add `vec_madd` and `vec_msub`
folkertdev Mar 1, 2025
64ac60b
test `vec_rl`
folkertdev Mar 2, 2025
b5443d6
correct name of signed splat functions
folkertdev Mar 3, 2025
8a1e79e
add `vec_unpackh` and `vec_unpackl`
folkertdev Mar 3, 2025
ef8f79e
add `vec_avg`
folkertdev Mar 3, 2025
0dab415
add `vec_checksum`
folkertdev Mar 3, 2025
ad4d693
add `vec_add_u128`, `vec_addc_u128`, `vec_adde_u128` and `vec_addce_u…
folkertdev Mar 4, 2025
e61e147
add `vec_mule`
folkertdev Mar 4, 2025
ab66d14
add `vec_nmsub`
folkertdev Mar 4, 2025
6956f2a
clarify fixme waiting for a newer llvm version
folkertdev Mar 4, 2025
58fa7a7
add `vec_gfmsum`
folkertdev Mar 4, 2025
75e9c9e
add `vec_gfmsum_128`
folkertdev Mar 4, 2025
eba5adf
add `vec_gfmsum_accum` and `vec_gfmsum_accum_128`
folkertdev Mar 4, 2025
3c1bdfe
add `vec_nmadd`
folkertdev Mar 4, 2025
31be70e
add `vec_gather_element`
folkertdev Mar 4, 2025
7777e5c
add `vec_bperm_u128`
folkertdev Mar 4, 2025
f325359
add `vec_sel`
folkertdev Mar 4, 2025
a9a6405
add `vec_scatter`
folkertdev Mar 4, 2025
25746d1
add `vec_fp_test_data_class`
folkertdev Mar 5, 2025
9006150
add `vec_test_mask`
folkertdev Mar 5, 2025
7914100
add `vec_search_string_cc` and `vec_search_string_until_zero_cc`
folkertdev Mar 5, 2025
a48b504
add `vec_double` and `vec_float`
folkertdev Mar 5, 2025
814f140
add `vec_extend_s64`
folkertdev Mar 5, 2025
cd185a8
add `vec_signed` and `vec_unsigned`
folkertdev Mar 5, 2025
900d502
add `vec_cp_until_zero` and `vec_cp_until_zero_cc`
folkertdev Mar 5, 2025
01c208f
add `vec_msum_u128`
folkertdev Mar 5, 2025
bd478a9
add `vec_sld`, `vec_sldb`, `vec_sldw` and `vec_srdb`
folkertdev Mar 5, 2025
747870f
add `vec_cmprg`
folkertdev Mar 6, 2025
6a03b2f
add `vec_cmpnrg`
folkertdev Mar 6, 2025
434d285
add `vec_cmprg_idx` and `vec_cmpnrg_idx`
folkertdev Mar 6, 2025
66b2a46
add `vec_cmprg_cc` and friends
folkertdev Mar 6, 2025
4dbc8af
add `vec_cmprg_or_0_idx` and `vec_cmpnrg_or_0_idx`
folkertdev Mar 6, 2025
83d5d2a
add `vec_cmprg_or_0_idx_cc` and `vec_cmpnrg_or_0_idx_cc`
folkertdev Mar 6, 2025
e67cadf
let's not use `&mut` until we get confirmation it's OK
folkertdev Mar 6, 2025
c872f5b
add `vec_cmpgt`, `vec_cmplt`, `vec_cmpge`, `vec_cmple`
folkertdev Mar 7, 2025
e8508b5
add `vec_cmpeq` and `vec_cmpne`
folkertdev Mar 7, 2025
9e19b0e
add `vec_cmpeq_idx` and variations
folkertdev Mar 7, 2025
551ecd0
add `vec_all_nan`, `vec_any_nan`, `vec_all_numeric` and `vec_any_nume…
folkertdev Mar 8, 2025
db1d9ca
add `vec_any_*` and `vec_all_*`
folkertdev Mar 8, 2025
11d56df
add `vec_mulo`
folkertdev Mar 8, 2025
1070ae8
add `vec_mulh`
folkertdev Mar 8, 2025
052dd81
add `vec_meadd`, `vec_moadd`, `vec_mhadd` and `vec_mladd`
folkertdev Mar 8, 2025
049b750
shink the size of type signatures
folkertdev Mar 16, 2025
5e2f119
move unsafe pointer writes to the surface
folkertdev Mar 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
wget \
xz-utils

RUN wget https://downloadmirror.intel.com/843185/sde-external-9.48.0-2024-11-25-lin.tar.xz
RUN tar -xJf sde-external-9.48.0-2024-11-25-lin.tar.xz
ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/sde-external-9.48.0-2024-11-25-lin/sde64 \
RUN wget http://ci-mirrors.rust-lang.org/stdarch/sde-external-9.48.0-2024-11-25-lin.tar.xz -O sde.tar.xz
RUN mkdir intel-sde
RUN tar -xJf sde.tar.xz --strip-components=1 -C intel-sde
ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \
-cpuid-in /checkout/ci/docker/x86_64-unknown-linux-gnu-emulated/cpuid.def \
-rtm-mode full -tsx --"
397 changes: 397 additions & 0 deletions crates/core_arch/src/aarch64/neon/generated.rs

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion crates/core_arch/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@
x86_amx_intrinsics,
f16,
keylocker_x86,
aarch64_unstable_target_feature
aarch64_unstable_target_feature,
bigint_helper_methods
)]
#![cfg_attr(test, feature(test, abi_vectorcall, stdarch_internal))]
#![deny(clippy::missing_inline_in_public_items)]
Expand Down
2 changes: 1 addition & 1 deletion crates/core_arch/src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ macro_rules! types {
impl crate::fmt::Debug for $name {
#[inline]
fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -> crate::fmt::Result {
crate::core_arch::simd::debug_simd_finish(f, stringify!($name), self.0)
crate::core_arch::simd::debug_simd_finish(f, stringify!($name), self.as_array())
}
}
)*);
Expand Down
19 changes: 19 additions & 0 deletions crates/core_arch/src/s390x/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,19 @@ macro_rules! l_t_t {
u8
};

(vector_bool_long_long ) => {
u64
};
(vector_bool_int ) => {
u32
};
(vector_bool_short ) => {
u16
};
(vector_bool_char ) => {
u8
};

(vector_float) => {
f32
};
Expand Down Expand Up @@ -338,6 +351,9 @@ macro_rules! t_u {
(vector_bool_int) => {
vector_unsigned_int
};
(vector_bool_long_long) => {
vector_unsigned_long_long
};
(vector_unsigned_char) => {
vector_unsigned_char
};
Expand Down Expand Up @@ -380,6 +396,9 @@ macro_rules! t_b {
(vector_bool_int) => {
vector_bool_int
};
(vector_bool_long_long) => {
vector_bool_long_long
};
(vector_signed_char) => {
vector_bool_char
};
Expand Down
4,916 changes: 4,341 additions & 575 deletions crates/core_arch/src/s390x/vector.rs

Large diffs are not rendered by default.

63 changes: 53 additions & 10 deletions crates/core_arch/src/simd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
macro_rules! simd_ty {
($id:ident [$elem_type:ty ; $len:literal]: $($param_name:ident),*) => {
#[repr(simd)]
#[derive(Copy, Clone, Debug, PartialEq)]
#[derive(Copy, Clone)]
pub(crate) struct $id([$elem_type; $len]);

#[allow(clippy::use_self)]
Expand Down Expand Up @@ -38,13 +38,32 @@ macro_rules! simd_ty {
/// Use for testing only.
// FIXME: Workaround rust@60637
#[inline(always)]
pub(crate) fn extract(self, index: usize) -> $elem_type {
assert!(index < $len);
// Now that we know this is in-bounds, use pointer arithmetic to access the right element.
let self_ptr = &self as *const Self as *const $elem_type;
unsafe {
self_ptr.add(index).read()
}
pub(crate) fn extract(&self, index: usize) -> $elem_type {
self.as_array()[index]
}

#[inline]
pub(crate) fn as_array(&self) -> &[$elem_type; $len] {
let simd_ptr: *const Self = self;
let array_ptr: *const [$elem_type; $len] = simd_ptr.cast();
// SAFETY: We can always read the prefix of a simd type as an array.
// There might be more padding afterwards for some widths, but
// that's not a problem for reading less than that.
unsafe { &*array_ptr }
}
}

impl core::cmp::PartialEq for $id {
#[inline]
fn eq(&self, other: &Self) -> bool {
self.as_array() == other.as_array()
}
}

impl core::fmt::Debug for $id {
#[inline]
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
debug_simd_finish(f, stringify!($id), self.as_array())
}
}
}
Expand All @@ -53,7 +72,7 @@ macro_rules! simd_ty {
macro_rules! simd_m_ty {
($id:ident [$elem_type:ident ; $len:literal]: $($param_name:ident),*) => {
#[repr(simd)]
#[derive(Copy, Clone, Debug, PartialEq)]
#[derive(Copy, Clone)]
pub(crate) struct $id([$elem_type; $len]);

#[allow(clippy::use_self)]
Expand All @@ -79,6 +98,30 @@ macro_rules! simd_m_ty {
// a simd type with exactly one element.
unsafe { simd_shuffle!(one, one, [0; $len]) }
}

#[inline]
pub(crate) fn as_array(&self) -> &[$elem_type; $len] {
let simd_ptr: *const Self = self;
let array_ptr: *const [$elem_type; $len] = simd_ptr.cast();
// SAFETY: We can always read the prefix of a simd type as an array.
// There might be more padding afterwards for some widths, but
// that's not a problem for reading less than that.
unsafe { &*array_ptr }
}
}

impl core::cmp::PartialEq for $id {
#[inline]
fn eq(&self, other: &Self) -> bool {
self.as_array() == other.as_array()
}
}

impl core::fmt::Debug for $id {
#[inline]
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
debug_simd_finish(f, stringify!($id), self.as_array())
}
}
}
}
Expand Down Expand Up @@ -968,7 +1011,7 @@ simd_ty!(
pub(crate) fn debug_simd_finish<T: crate::fmt::Debug, const N: usize>(
formatter: &mut crate::fmt::Formatter<'_>,
type_name: &str,
array: [T; N],
array: &[T; N],
) -> crate::fmt::Result {
crate::fmt::Formatter::debug_tuple_fields_finish(
formatter,
Expand Down
64 changes: 52 additions & 12 deletions crates/core_arch/src/wasm32/simd128.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2318,7 +2318,25 @@ pub fn u8x16_narrow_i16x8(a: v128, b: v128) -> v128 {
#[doc(alias("i8x16.shl"))]
#[stable(feature = "wasm_simd", since = "1.54.0")]
pub fn i8x16_shl(a: v128, amt: u32) -> v128 {
unsafe { simd_shl(a.as_i8x16(), simd::i8x16::splat(amt as i8)).v128() }
// SAFETY: the safety of this intrinsic relies on the fact that the
// shift amount for each lane is less than the number of bits in the input
// lane. In this case the input has 8-bit lanes but the shift amount above
// is `u32`, so a mask is required to discard all the upper bits of `amt` to
// ensure that the safety condition is met.
//
// Note that this is distinct from the behavior of the native WebAssembly
// instruction here where WebAssembly defines this instruction as performing
// a mask as well. This is nonetheless required since this must have defined
// semantics in LLVM, not just WebAssembly.
//
// Finally note that this mask operation is not actually emitted into the
// final binary itself. LLVM understands that the wasm operation implicitly
// masks, so it knows this mask operation is redundant.
//
// Basically the extra mask here is required as a bridge from the documented
// semantics through LLVM back out to WebAssembly. Both ends have the
// documented semantics, and the mask is required by LLVM in the middle.
unsafe { simd_shl(a.as_i8x16(), simd::i8x16::splat((amt & 0x7) as i8)).v128() }
}

#[stable(feature = "wasm_simd", since = "1.54.0")]
Expand All @@ -2335,7 +2353,9 @@ pub use i8x16_shl as u8x16_shl;
#[doc(alias("i8x16.shr_s"))]
#[stable(feature = "wasm_simd", since = "1.54.0")]
pub fn i8x16_shr(a: v128, amt: u32) -> v128 {
unsafe { simd_shr(a.as_i8x16(), simd::i8x16::splat(amt as i8)).v128() }
// SAFETY: see i8x16_shl for more documentation why this is unsafe,
// essentially the shift amount must be valid hence the mask.
unsafe { simd_shr(a.as_i8x16(), simd::i8x16::splat((amt & 0x7) as i8)).v128() }
}

/// Shifts each lane to the right by the specified number of bits, shifting in
Expand All @@ -2349,7 +2369,9 @@ pub fn i8x16_shr(a: v128, amt: u32) -> v128 {
#[doc(alias("i8x16.shr_u"))]
#[stable(feature = "wasm_simd", since = "1.54.0")]
pub fn u8x16_shr(a: v128, amt: u32) -> v128 {
unsafe { simd_shr(a.as_u8x16(), simd::u8x16::splat(amt as u8)).v128() }
// SAFETY: see i8x16_shl for more documentation why this is unsafe,
// essentially the shift amount must be valid hence the mask.
unsafe { simd_shr(a.as_u8x16(), simd::u8x16::splat((amt & 0x7) as u8)).v128() }
}

/// Adds two 128-bit vectors as if they were two packed sixteen 8-bit integers.
Expand Down Expand Up @@ -2686,7 +2708,9 @@ pub use i16x8_extend_high_u8x16 as u16x8_extend_high_u8x16;
#[doc(alias("i16x8.shl"))]
#[stable(feature = "wasm_simd", since = "1.54.0")]
pub fn i16x8_shl(a: v128, amt: u32) -> v128 {
unsafe { simd_shl(a.as_i16x8(), simd::i16x8::splat(amt as i16)).v128() }
// SAFETY: see i8x16_shl for more documentation why this is unsafe,
// essentially the shift amount must be valid hence the mask.
unsafe { simd_shl(a.as_i16x8(), simd::i16x8::splat((amt & 0xf) as i16)).v128() }
}

#[stable(feature = "wasm_simd", since = "1.54.0")]
Expand All @@ -2703,7 +2727,9 @@ pub use i16x8_shl as u16x8_shl;
#[doc(alias("i16x8.shr_s"))]
#[stable(feature = "wasm_simd", since = "1.54.0")]
pub fn i16x8_shr(a: v128, amt: u32) -> v128 {
unsafe { simd_shr(a.as_i16x8(), simd::i16x8::splat(amt as i16)).v128() }
// SAFETY: see i8x16_shl for more documentation why this is unsafe,
// essentially the shift amount must be valid hence the mask.
unsafe { simd_shr(a.as_i16x8(), simd::i16x8::splat((amt & 0xf) as i16)).v128() }
}

/// Shifts each lane to the right by the specified number of bits, shifting in
Expand All @@ -2717,7 +2743,9 @@ pub fn i16x8_shr(a: v128, amt: u32) -> v128 {
#[doc(alias("i16x8.shr_u"))]
#[stable(feature = "wasm_simd", since = "1.54.0")]
pub fn u16x8_shr(a: v128, amt: u32) -> v128 {
unsafe { simd_shr(a.as_u16x8(), simd::u16x8::splat(amt as u16)).v128() }
// SAFETY: see i8x16_shl for more documentation why this is unsafe,
// essentially the shift amount must be valid hence the mask.
unsafe { simd_shr(a.as_u16x8(), simd::u16x8::splat((amt & 0xf) as u16)).v128() }
}

/// Adds two 128-bit vectors as if they were two packed eight 16-bit integers.
Expand Down Expand Up @@ -3136,7 +3164,9 @@ pub use i32x4_extend_high_u16x8 as u32x4_extend_high_u16x8;
#[doc(alias("i32x4.shl"))]
#[stable(feature = "wasm_simd", since = "1.54.0")]
pub fn i32x4_shl(a: v128, amt: u32) -> v128 {
unsafe { simd_shl(a.as_i32x4(), simd::i32x4::splat(amt as i32)).v128() }
// SAFETY: see i8x16_shl for more documentation why this is unsafe,
// essentially the shift amount must be valid hence the mask.
unsafe { simd_shl(a.as_i32x4(), simd::i32x4::splat((amt & 0x1f) as i32)).v128() }
}

#[stable(feature = "wasm_simd", since = "1.54.0")]
Expand All @@ -3153,7 +3183,9 @@ pub use i32x4_shl as u32x4_shl;
#[doc(alias("i32x4.shr_s"))]
#[stable(feature = "wasm_simd", since = "1.54.0")]
pub fn i32x4_shr(a: v128, amt: u32) -> v128 {
unsafe { simd_shr(a.as_i32x4(), simd::i32x4::splat(amt as i32)).v128() }
// SAFETY: see i8x16_shl for more documentation why this is unsafe,
// essentially the shift amount must be valid hence the mask.
unsafe { simd_shr(a.as_i32x4(), simd::i32x4::splat((amt & 0x1f) as i32)).v128() }
}

/// Shifts each lane to the right by the specified number of bits, shifting in
Expand All @@ -3167,7 +3199,9 @@ pub fn i32x4_shr(a: v128, amt: u32) -> v128 {
#[doc(alias("i32x4.shr_u"))]
#[stable(feature = "wasm_simd", since = "1.54.0")]
pub fn u32x4_shr(a: v128, amt: u32) -> v128 {
unsafe { simd_shr(a.as_u32x4(), simd::u32x4::splat(amt)).v128() }
// SAFETY: see i8x16_shl for more documentation why this is unsafe,
// essentially the shift amount must be valid hence the mask.
unsafe { simd_shr(a.as_u32x4(), simd::u32x4::splat(amt & 0x1f)).v128() }
}

/// Adds two 128-bit vectors as if they were two packed four 32-bit integers.
Expand Down Expand Up @@ -3502,7 +3536,9 @@ pub use i64x2_extend_high_u32x4 as u64x2_extend_high_u32x4;
#[doc(alias("i64x2.shl"))]
#[stable(feature = "wasm_simd", since = "1.54.0")]
pub fn i64x2_shl(a: v128, amt: u32) -> v128 {
unsafe { simd_shl(a.as_i64x2(), simd::i64x2::splat(amt as i64)).v128() }
// SAFETY: see i8x16_shl for more documentation why this is unsafe,
// essentially the shift amount must be valid hence the mask.
unsafe { simd_shl(a.as_i64x2(), simd::i64x2::splat((amt & 0x3f) as i64)).v128() }
}

#[stable(feature = "wasm_simd", since = "1.54.0")]
Expand All @@ -3519,7 +3555,9 @@ pub use i64x2_shl as u64x2_shl;
#[doc(alias("i64x2.shr_s"))]
#[stable(feature = "wasm_simd", since = "1.54.0")]
pub fn i64x2_shr(a: v128, amt: u32) -> v128 {
unsafe { simd_shr(a.as_i64x2(), simd::i64x2::splat(amt as i64)).v128() }
// SAFETY: see i8x16_shl for more documentation why this is unsafe,
// essentially the shift amount must be valid hence the mask.
unsafe { simd_shr(a.as_i64x2(), simd::i64x2::splat((amt & 0x3f) as i64)).v128() }
}

/// Shifts each lane to the right by the specified number of bits, shifting in
Expand All @@ -3533,7 +3571,9 @@ pub fn i64x2_shr(a: v128, amt: u32) -> v128 {
#[doc(alias("i64x2.shr_u"))]
#[stable(feature = "wasm_simd", since = "1.54.0")]
pub fn u64x2_shr(a: v128, amt: u32) -> v128 {
unsafe { simd_shr(a.as_u64x2(), simd::u64x2::splat(amt as u64)).v128() }
// SAFETY: see i8x16_shl for more documentation why this is unsafe,
// essentially the shift amount must be valid hence the mask.
unsafe { simd_shr(a.as_u64x2(), simd::u64x2::splat((amt & 0x3f) as u64)).v128() }
}

/// Adds two 128-bit vectors as if they were two packed two 64-bit integers.
Expand Down
26 changes: 26 additions & 0 deletions crates/intrinsic-test/missing_aarch64.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,32 @@ vrnd32x_f64
vrnd32z_f64
vrnd64x_f64
vrnd64z_f64
vluti2_lane_p16
vluti2_lane_p8
vluti2_lane_s16
vluti2_lane_s8
vluti2_lane_u16
vluti2_lane_u8
vluti2q_lane_p16
vluti2q_lane_p8
vluti2q_lane_s16
vluti2q_lane_s8
vluti2q_lane_u16
vluti2q_lane_u8
vluti4q_lane_f16_x2
vluti4q_lane_p16_x2
vluti4q_lane_p8
vluti4q_lane_s16_x2
vluti4q_lane_s8
vluti4q_lane_u16_x2
vluti4q_lane_u8
vluti4q_laneq_f16_x2
vluti4q_laneq_p16_x2
vluti4q_laneq_p8
vluti4q_laneq_s16_x2
vluti4q_laneq_s8
vluti4q_laneq_u16_x2
vluti4q_laneq_u8

# Broken in Clang
vcvth_s16_f16
Expand Down
2 changes: 1 addition & 1 deletion crates/intrinsic-test/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ fn compile_c(
let arch_flags = if target.contains("v7") {
"-march=armv8.6-a+crypto+crc+dotprod+fp16"
} else {
"-march=armv8.6-a+crypto+sha3+crc+dotprod+fp16+faminmax"
"-march=armv8.6-a+crypto+sha3+crc+dotprod+fp16+faminmax+lut"
};

let intrinsic_name = &intrinsic.name;
Expand Down
Loading