From 9426bb56586c6ae4095a2dcbd66c570253e6fb32 Mon Sep 17 00:00:00 2001
From: James Barford-Evans <james.barford-evans@arm.com>
Date: Mon, 3 Mar 2025 08:38:16 +0000
Subject: [PATCH 01/57] feat - FEAT_LUT neon instrinsics

---
 .../core_arch/src/aarch64/neon/generated.rs   | 397 ++++++++
 crates/intrinsic-test/missing_aarch64.txt     |  26 +
 crates/intrinsic-test/src/main.rs             |   2 +-
 .../spec/neon/aarch64.spec.yml                | 272 ++++++
 intrinsics_data/arm_intrinsics.json           | 910 ++++++++++++++++++
 5 files changed, 1606 insertions(+), 1 deletion(-)
diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs
index 485aa80cfe..56e522a316 100644
--- a/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/crates/core_arch/src/aarch64/neon/generated.rs
@@ -12826,6 +12826,403 @@ pub unsafe fn vld4q_u64(a: *const u64) -> uint64x2x4_t {
     ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [1, 0]) };
     ret_val
 }
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2_lane_s8<const LANE: i32>(a: int8x8_t, b: uint8x8_t) -> int8x16_t {
+    static_assert!(LANE >= 0 && LANE <= 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vluti2.lane.v16i8.v8i8"
+        )]
+        fn _vluti2_lane_s8(a: int8x8_t, b: uint8x8_t, n: i32) -> int8x16_t;
+    }
+    _vluti2_lane_s8(a, b, LANE)
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2q_lane_s8<const LANE: i32>(a: int8x16_t, b: uint8x8_t) -> int8x16_t {
+    static_assert!(LANE >= 0 && LANE <= 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vluti2.lane.v16i8.v16i8"
+        )]
+        fn _vluti2q_lane_s8(a: int8x16_t, b: uint8x8_t, n: i32) -> int8x16_t;
+    }
+    _vluti2q_lane_s8(a, b, LANE)
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2_lane_s16<const LANE: i32>(a: int16x4_t, b: uint8x8_t) -> int16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vluti2.lane.v8i16.v4i16"
+        )]
+        fn _vluti2_lane_s16(a: int16x4_t, b: uint8x8_t, n: i32) -> int16x8_t;
+    }
+    _vluti2_lane_s16(a, b, LANE)
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2q_lane_s16<const LANE: i32>(a: int16x8_t, b: uint8x8_t) -> int16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vluti2.lane.v8i16.v8i16"
+        )]
+        fn _vluti2q_lane_s16(a: int16x8_t, b: uint8x8_t, n: i32) -> int16x8_t;
+    }
+    _vluti2q_lane_s16(a, b, LANE)
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2_lane_u8<const LANE: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x16_t {
+    static_assert!(LANE >= 0 && LANE <= 1);
+    transmute(vluti2_lane_s8::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2q_lane_u8<const LANE: i32>(a: uint8x16_t, b: uint8x8_t) -> uint8x16_t {
+    static_assert!(LANE >= 0 && LANE <= 1);
+    transmute(vluti2q_lane_s8::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint8x8_t) -> uint16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 3);
+    transmute(vluti2_lane_s16::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2q_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 3);
+    transmute(vluti2q_lane_s16::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2_lane_p8<const LANE: i32>(a: poly8x8_t, b: uint8x8_t) -> poly8x16_t {
+    static_assert!(LANE >= 0 && LANE <= 1);
+    transmute(vluti2_lane_s8::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2q_lane_p8<const LANE: i32>(a: poly8x16_t, b: uint8x8_t) -> poly8x16_t {
+    static_assert!(LANE >= 0 && LANE <= 1);
+    transmute(vluti2q_lane_s8::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2_lane_p16<const LANE: i32>(a: poly16x4_t, b: uint8x8_t) -> poly16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 3);
+    transmute(vluti2_lane_s16::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2q_lane_p16<const LANE: i32>(a: poly16x8_t, b: uint8x8_t) -> poly16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 3);
+    transmute(vluti2q_lane_s16::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_f16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut,fp16")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_lane_f16_x2<const LANE: i32>(a: float16x8x2_t, b: uint8x8_t) -> float16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 1);
+    transmute(vluti4q_lane_s16_x2::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_u16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_lane_u16_x2<const LANE: i32>(a: uint16x8x2_t, b: uint8x8_t) -> uint16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 1);
+    transmute(vluti4q_lane_s16_x2::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_p16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_lane_p16_x2<const LANE: i32>(a: poly16x8x2_t, b: uint8x8_t) -> poly16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 1);
+    transmute(vluti4q_lane_s16_x2::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_s16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_lane_s16_x2<const LANE: i32>(a: int16x8x2_t, b: uint8x8_t) -> int16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vluti4q.lane.x2.v8i16"
+        )]
+        fn _vluti4q_lane_s16_x2(a: int16x8_t, a: int16x8_t, b: uint8x8_t, n: i32) -> int16x8_t;
+    }
+    _vluti4q_lane_s16_x2(a.0, a.1, b, LANE)
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_lane_s8<const LANE: i32>(a: int8x16_t, b: uint8x8_t) -> int8x16_t {
+    static_assert!(LANE == 0);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vluti4q.lane.v8i8"
+        )]
+        fn _vluti4q_lane_s8(a: int8x16_t, b: uint8x8_t, n: i32) -> int8x16_t;
+    }
+    _vluti4q_lane_s8(a, b, LANE)
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_lane_u8<const LANE: i32>(a: uint8x16_t, b: uint8x8_t) -> uint8x16_t {
+    static_assert!(LANE == 0);
+    transmute(vluti4q_lane_s8::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_lane_p8<const LANE: i32>(a: poly8x16_t, b: uint8x8_t) -> poly8x16_t {
+    static_assert!(LANE == 0);
+    transmute(vluti4q_lane_s8::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_f16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut,fp16")]
+#[cfg_attr(test, assert_instr(nop, LANE = 3))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_laneq_f16_x2<const LANE: i32>(
+    a: float16x8x2_t,
+    b: uint8x16_t,
+) -> float16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 3);
+    transmute(vluti4q_laneq_s16_x2::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_u16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 3))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_laneq_u16_x2<const LANE: i32>(a: uint16x8x2_t, b: uint8x16_t) -> uint16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 3);
+    transmute(vluti4q_laneq_s16_x2::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_p16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 3))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_laneq_p16_x2<const LANE: i32>(a: poly16x8x2_t, b: uint8x16_t) -> poly16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 3);
+    transmute(vluti4q_laneq_s16_x2::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_s16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 3))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_laneq_s16_x2<const LANE: i32>(a: int16x8x2_t, b: uint8x16_t) -> int16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vluti4q.laneq.x2.v8i16"
+        )]
+        fn _vluti4q_laneq_s16_x2(a: int16x8_t, b: int16x8_t, c: uint8x16_t, n: i32) -> int16x8_t;
+    }
+    _vluti4q_laneq_s16_x2(a.0, a.1, b, LANE)
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_laneq_s8<const LANE: i32>(a: int8x16_t, b: uint8x16_t) -> int8x16_t {
+    static_assert!(LANE >= 0 && LANE <= 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vluti4q.laneq.v16i8"
+        )]
+        fn _vluti4q_laneq_s8(a: int8x16_t, b: uint8x16_t, n: i32) -> int8x16_t;
+    }
+    _vluti4q_laneq_s8(a, b, LANE)
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_laneq_u8<const LANE: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    static_assert!(LANE >= 0 && LANE <= 1);
+    transmute(vluti4q_laneq_s8::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_laneq_p8<const LANE: i32>(a: poly8x16_t, b: uint8x16_t) -> poly8x16_t {
+    static_assert!(LANE >= 0 && LANE <= 1);
+    transmute(vluti4q_laneq_s8::<LANE>(transmute(a), b))
+}
 #[doc = "Maximum (vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f64)"]
 #[inline]
diff --git a/crates/intrinsic-test/missing_aarch64.txt b/crates/intrinsic-test/missing_aarch64.txt
index 2f625d3e21..bbcfc40c69 100644
--- a/crates/intrinsic-test/missing_aarch64.txt
+++ b/crates/intrinsic-test/missing_aarch64.txt
@@ -30,6 +30,32 @@ vrnd32x_f64
 vrnd32z_f64
 vrnd64x_f64
 vrnd64z_f64
+vluti2_lane_p16
+vluti2_lane_p8
+vluti2_lane_s16
+vluti2_lane_s8
+vluti2_lane_u16
+vluti2_lane_u8
+vluti2q_lane_p16
+vluti2q_lane_p8
+vluti2q_lane_s16
+vluti2q_lane_s8
+vluti2q_lane_u16
+vluti2q_lane_u8
+vluti4q_lane_f16_x2
+vluti4q_lane_p16_x2
+vluti4q_lane_p8
+vluti4q_lane_s16_x2
+vluti4q_lane_s8
+vluti4q_lane_u16_x2
+vluti4q_lane_u8
+vluti4q_laneq_f16_x2
+vluti4q_laneq_p16_x2
+vluti4q_laneq_p8
+vluti4q_laneq_s16_x2
+vluti4q_laneq_s8
+vluti4q_laneq_u16_x2
+vluti4q_laneq_u8
 
 # Broken in Clang
 vcvth_s16_f16
diff --git a/crates/intrinsic-test/src/main.rs b/crates/intrinsic-test/src/main.rs
index 0fa0268053..2d04f1cf34 100644
--- a/crates/intrinsic-test/src/main.rs
+++ b/crates/intrinsic-test/src/main.rs
@@ -240,7 +240,7 @@ fn compile_c(
     let arch_flags = if target.contains("v7") {
         "-march=armv8.6-a+crypto+crc+dotprod+fp16"
     } else {
-        "-march=armv8.6-a+crypto+sha3+crc+dotprod+fp16+faminmax"
+        "-march=armv8.6-a+crypto+sha3+crc+dotprod+fp16+faminmax+lut"
     };
 
     let intrinsic_name = &intrinsic.name;
diff --git a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 1356c609fe..41fdfc5382 100644
--- a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -89,6 +89,10 @@ aarch64-crc-stable: &aarch64-crc-stable
 neon-unstable-f16: &neon-unstable-f16
   FnCall: [unstable, ['feature = "stdarch_neon_f16"', 'issue = "136306"']]
 
+# #[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+neon-unstable-feat-lut: &neon-unstable-feat-lut
+  FnCall: [unstable, ['feature = "stdarch_neon_feat_lut"', 'issue = "138050"']]
+
 intrinsics:
   - name: "vaddd_{type}"
     doc: Add
@@ -14013,3 +14017,271 @@ intrinsics:
           links:
             - link: "llvm.aarch64.neon.famin.{neon_type}"
               arch: aarch64,arm64ec
+
+  - name: "vluti2{neon_type[0].lane_nox}"
+    doc: "Lookup table read with 2-bit indices"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [target_feature, ['enable = {type[4]}']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 1']]}]]
+      - *neon-unstable-feat-lut
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t,   uint8x8_t, int8x16_t, 'LANE >= 0 && LANE <= 1', '"neon,lut"']
+      - [int8x16_t,  uint8x8_t, int8x16_t, 'LANE >= 0 && LANE <= 1', '"neon,lut"']
+      - [int16x4_t,  uint8x8_t, int16x8_t, 'LANE >= 0 && LANE <= 3', '"neon,lut"']
+      - [int16x8_t,  uint8x8_t, int16x8_t, 'LANE >= 0 && LANE <= 3', '"neon,lut"']
+    compose:
+      - FnCall: ['static_assert!', ['{type[3]}']]
+      - LLVMLink:
+          name: "vluti2{neon_type[0].lane_nox}"
+          arguments:
+            - 'a: {neon_type[0]}'
+            - 'b: {neon_type[1]}'
+            - 'n: i32'
+          links:
+            - link: "llvm.aarch64.neon.vluti2.lane.{neon_type[2]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+      - FnCall: ['_vluti2{neon_type[0].lane_nox}', [a, b, LANE]]
+
+  - name: "vluti2{neon_type[0].lane_nox}"
+    doc: "Lookup table read with 2-bit indices"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,lut"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 1']]}]]
+      - *neon-unstable-feat-lut
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t,  uint8x8_t, uint8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x8_t']
+      - [uint8x16_t, uint8x8_t, uint8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x16_t']
+      - [poly8x8_t,  uint8x8_t, poly8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x8_t']
+      - [poly8x16_t, uint8x8_t, poly8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x16_t']
+      - [uint16x4_t, uint8x8_t, uint16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x4_t']
+      - [uint16x8_t, uint8x8_t, uint16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x8_t']
+      - [poly16x4_t, uint8x8_t, poly16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x4_t']
+      - [poly16x8_t, uint8x8_t, poly16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x8_t']
+    compose:
+      - FnCall: ['static_assert!', ['{type[3]}']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vluti2{neon_type[4].lane_nox}::<LANE>'
+                - - FnCall: [transmute, [a]]
+                  - b
+
+  - name: "vluti4{neon_type[0].lane_nox}"
+    doc: "Lookup table read with 4-bit indices"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = {type[3]}']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - *neon-unstable-feat-lut
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x16_t,  uint8x8_t, 'LANE == 0', '"neon,lut"']
+    compose:
+      - FnCall: ['static_assert!', ['{type[2]}']]
+      - LLVMLink:
+          name: "vluti4{neon_type[0].lane_nox}"
+          arguments:
+            - 'a: {neon_type[0]}'
+            - 'b: {neon_type[1]}'
+            - 'n: i32'
+          links:
+            - link: "llvm.aarch64.neon.vluti4q.lane.{neon_type[1]}"
+              arch: aarch64,arm64ec
+      - FnCall: ['_vluti4{neon_type[0].lane_nox}', [a, b, LANE]]
+
+  - name: "vluti4{neon_type[0].lane_nox}"
+    doc: "Lookup table read with 4-bit indices"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,lut"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - *neon-unstable-feat-lut
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x16_t, uint8x8_t, 'LANE == 0', int8x16_t]
+      - [poly8x16_t, uint8x8_t, 'LANE == 0', int8x16_t]
+    compose:
+      - FnCall: ['static_assert!', ['{type[2]}']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vluti4{neon_type[3].lane_nox}::<LANE>'
+                - - FnCall: [transmute, [a]]
+                  - b
+
+  - name: "vluti4{neon_type[0].laneq_nox}"
+    doc: "Lookup table read with 4-bit indices"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,lut"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - *neon-unstable-feat-lut
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x16_t,  uint8x16_t, 'LANE >= 0 && LANE <= 1']
+    compose:
+      - FnCall: ['static_assert!', ['{type[2]}']]
+      - LLVMLink:
+          name: "vluti4{neon_type[0].laneq_nox}"
+          arguments:
+            - 'a: {neon_type[0]}'
+            - 'b: {neon_type[1]}'
+            - 'n: i32'
+          links:
+            - link: "llvm.aarch64.neon.vluti4q.laneq.{neon_type[1]}"
+              arch: aarch64,arm64ec
+      - FnCall: ['_vluti4{neon_type[0].laneq_nox}', [a, b, LANE]]
+
+  - name: "vluti4{neon_type[0].laneq_nox}"
+    doc: "Lookup table read with 4-bit indices"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,lut"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - *neon-unstable-feat-lut
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x16_t, uint8x16_t, 'LANE >= 0 && LANE <= 1', int8x16_t]
+      - [poly8x16_t, uint8x16_t, 'LANE >= 0 && LANE <= 1', int8x16_t]
+    compose:
+      - FnCall: ['static_assert!', ['{type[2]}']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vluti4{neon_type[3].laneq_nox}::<LANE>'
+                - - FnCall: [transmute, [a]]
+                  - b
+
+  - name: "vluti4q_lane_{neon_type[0]}_x2"
+    doc: "Lookup table read with 4-bit indices"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [target_feature, ['enable = {type[4]}']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - *neon-unstable-feat-lut
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8x2_t, uint8x8_t, int16x8_t, 'LANE >= 0 && LANE <= 1', '"neon,lut"']
+    compose:
+      - FnCall: ['static_assert!', ['{type[3]}']]
+      - LLVMLink:
+          name: "vluti4q_lane_{neon_type[0]}_x2"
+          arguments:
+            - 'a: {neon_type[2]}'
+            - 'a: {neon_type[2]}'
+            - 'b: {neon_type[1]}'
+            - 'n: i32'
+          links:
+            - link: "llvm.aarch64.neon.vluti4q.lane.x2.{neon_type[2]}"
+              arch: aarch64,arm64ec
+      - FnCall: ['_vluti4q_lane_{neon_type[0]}_x2', ['a.0', 'a.1', b, LANE]]
+
+  - name: "vluti4q_lane_{neon_type[0]}_x2"
+    doc: "Lookup table read with 4-bit indices"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [target_feature, ['enable = {type[4]}']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - *neon-unstable-feat-lut
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint16x8x2_t, uint8x8_t, uint16x8_t, 'LANE >= 0 && LANE <= 1', '"neon,lut"', int16x8x2_t]
+      - [poly16x8x2_t, uint8x8_t, poly16x8_t, 'LANE >= 0 && LANE <= 1', '"neon,lut"', int16x8x2_t]
+      - [float16x8x2_t, uint8x8_t, float16x8_t, 'LANE >= 0 && LANE <= 1', '"neon,lut,fp16"', int16x8x2_t]
+    compose:
+      - FnCall: ['static_assert!', ['{type[3]}']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vluti4q_lane_{neon_type[5]}_x2::<LANE>'
+                - - FnCall: [transmute, [a]]
+                  - b
+
+  - name: "vluti4q_laneq_{neon_type[0]}_x2"
+    doc: "Lookup table read with 4-bit indices"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [target_feature, ['enable = {type[4]}']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 3']]}]]
+      - *neon-unstable-feat-lut
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8x2_t, uint8x16_t, int16x8_t, 'LANE >= 0 && LANE <= 3', '"neon,lut"']
+    compose:
+      - FnCall: ['static_assert!', ['{type[3]}']]
+      - LLVMLink:
+          name: "vluti4{neon_type[0].lane_nox}"
+          arguments:
+            - 'a: {neon_type[2]}'
+            - 'b: {neon_type[2]}'
+            - 'c: {neon_type[1]}'
+            - 'n: i32'
+          links:
+            - link: "llvm.aarch64.neon.vluti4q.laneq.x2.{neon_type[2]}"
+              arch: aarch64,arm64ec
+      - FnCall: ['_vluti4q_laneq_{neon_type[0]}_x2', ['a.0', 'a.1', b, LANE]]
+
+  - name: "vluti4q_laneq_{neon_type[0]}_x2"
+    doc: "Lookup table read with 4-bit indices"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [target_feature, ['enable = {type[4]}']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 3']]}]]
+      - *neon-unstable-feat-lut
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint16x8x2_t,  uint8x16_t, uint16x8_t, 'LANE >= 0 && LANE <= 3', '"neon,lut"', int16x8x2_t]
+      - [poly16x8x2_t,  uint8x16_t, poly16x8_t, 'LANE >= 0 && LANE <= 3', '"neon,lut"', int16x8x2_t]
+      - [float16x8x2_t, uint8x16_t, float16x8_t, 'LANE >= 0 && LANE <= 3', '"neon,lut,fp16"', int16x8x2_t]
+    compose:
+      - FnCall: ['static_assert!', ['{type[3]}']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vluti4q_laneq_{neon_type[5]}_x2::<LANE>'
+                - - FnCall: [transmute, [a]]
+                  - b
diff --git a/intrinsics_data/arm_intrinsics.json b/intrinsics_data/arm_intrinsics.json
index 32b6120d75..9d58aad49c 100644
--- a/intrinsics_data/arm_intrinsics.json
+++ b/intrinsics_data/arm_intrinsics.json
@@ -118843,5 +118843,915 @@
         "FAMAX"
       ]
     ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti2_lane_u8",
+    "arguments": [
+      "uint8x8_t a",
+      "uint8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti2q_lane_u8",
+    "arguments": [
+      "uint8x16_t a",
+      "uint8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti2_lane_s8",
+    "arguments": [
+      "int8x8_t a",
+      "uint8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti2q_lane_s8",
+    "arguments": [
+      "int8x16_t a",
+      "uint8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti2_lane_p8",
+    "arguments": [
+      "poly8x8_t a",
+      "uint8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "poly8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti2q_lane_p8",
+    "arguments": [
+      "poly8x16_t a",
+      "uint8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "poly8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti2_lane_u16",
+    "arguments": [
+      "uint16x4_t a",
+      "uint8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti2q_lane_u16",
+    "arguments": [
+      "uint16x8_t a",
+      "uint8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti2_lane_s16",
+    "arguments": [
+      "int16x4_t a",
+      "uint8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti2q_lane_s16",
+    "arguments": [
+      "int16x8_t a",
+      "uint8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti2_lane_p16",
+    "arguments": [
+      "poly16x4_t a",
+      "uint8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "poly16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti2q_lane_p16",
+    "arguments": [
+      "poly16x8_t a",
+      "uint8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "poly16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_lane_u8",
+    "arguments": [
+      "uint8x16_t vn",
+      "uint8x8_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn.16B"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "r": {
+        "register": "Vd.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_laneq_u8",
+    "arguments": [
+      "uint8x16_t vn",
+      "uint8x16_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn.16B"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_lane_s8",
+    "arguments": [
+      "int8x16_t vn",
+      "uint8x8_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn.16B"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "r": {
+        "register": "Vd.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_laneq_s8",
+    "arguments": [
+      "int8x16_t vn",
+      "uint8x16_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn.16B"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_lane_p8",
+    "arguments": [
+      "poly8x16_t vn",
+      "uint8x8_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "poly8x16_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn.16B"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "r": {
+        "register": "Vd.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_laneq_p8",
+    "arguments": [
+      "poly8x16_t vn",
+      "uint8x16_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "poly8x16_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn.16B"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_lane_u16_x2",
+    "arguments": [
+      "uint16x8x2_t vn",
+      "uint8x8_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn1.8H"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_laneq_u16_x2",
+    "arguments": [
+      "uint16x8x2_t vn",
+      "uint8x16_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn1.8H"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_lane_s16_x2",
+    "arguments": [
+      "int16x8x2_t vn",
+      "uint8x8_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn1.8H"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_laneq_s16_x2",
+    "arguments": [
+      "int16x8x2_t vn",
+      "uint8x16_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn1.8H"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_lane_f16_x2",
+    "arguments": [
+      "float16x8x2_t vn",
+      "uint8x8_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn1.8H"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_laneq_f16_x2",
+    "arguments": [
+      "float16x8x2_t vn",
+      "uint8x16_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn1.8H"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_lane_p16_x2",
+    "arguments": [
+      "poly16x8x2_t vn",
+      "uint8x8_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "poly16x8_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn1.8H"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_laneq_p16_x2",
+    "arguments": [
+      "poly16x8x2_t vn",
+      "uint8x16_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "poly16x8_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn1.8H"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
   }
 ]

From 58538b167ff39bf0d0e5fdd9d2c1869cc0d4433b Mon Sep 17 00:00:00 2001
From: Scott McMurray <scottmcm@users.noreply.github.com>
Date: Tue, 4 Mar 2025 22:25:08 -0800
Subject: [PATCH 02/57] Don't field-project (`.0`) into SIMD types

---
 crates/core_arch/src/macros.rs |  2 +-
 crates/core_arch/src/simd.rs   | 63 ++++++++++++++++++++++++++++------
 2 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/crates/core_arch/src/macros.rs b/crates/core_arch/src/macros.rs
index f59e278bb7..e00b433536 100644
--- a/crates/core_arch/src/macros.rs
+++ b/crates/core_arch/src/macros.rs
@@ -132,7 +132,7 @@ macro_rules! types {
         impl crate::fmt::Debug for $name {
             #[inline]
             fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -> crate::fmt::Result {
-                crate::core_arch::simd::debug_simd_finish(f, stringify!($name), self.0)
+                crate::core_arch::simd::debug_simd_finish(f, stringify!($name), self.as_array())
             }
         }
     )*);
diff --git a/crates/core_arch/src/simd.rs b/crates/core_arch/src/simd.rs
index 29e4b3e7a2..25834943f0 100644
--- a/crates/core_arch/src/simd.rs
+++ b/crates/core_arch/src/simd.rs
@@ -5,7 +5,7 @@
 macro_rules! simd_ty {
     ($id:ident [$elem_type:ty ; $len:literal]: $($param_name:ident),*) => {
         #[repr(simd)]
-        #[derive(Copy, Clone, Debug, PartialEq)]
+        #[derive(Copy, Clone)]
         pub(crate) struct $id([$elem_type; $len]);
 
         #[allow(clippy::use_self)]
@@ -38,13 +38,32 @@ macro_rules! simd_ty {
             /// Use for testing only.
             // FIXME: Workaround rust@60637
             #[inline(always)]
-            pub(crate) fn extract(self, index: usize) -> $elem_type {
-                assert!(index < $len);
-                // Now that we know this is in-bounds, use pointer arithmetic to access the right element.
-                let self_ptr = &self as *const Self as *const $elem_type;
-                unsafe {
-                    self_ptr.add(index).read()
-                }
+            pub(crate) fn extract(&self, index: usize) -> $elem_type {
+                self.as_array()[index]
+            }
+
+            #[inline]
+            pub(crate) fn as_array(&self) -> &[$elem_type; $len] {
+                let simd_ptr: *const Self = self;
+                let array_ptr: *const [$elem_type; $len] = simd_ptr.cast();
+                // SAFETY: We can always read the prefix of a simd type as an array.
+                // There might be more padding afterwards for some widths, but
+                // that's not a problem for reading less than that.
+                unsafe { &*array_ptr }
+            }
+        }
+
+        impl core::cmp::PartialEq for $id {
+            #[inline]
+            fn eq(&self, other: &Self) -> bool {
+                self.as_array() == other.as_array()
+            }
+        }
+
+        impl core::fmt::Debug for $id {
+            #[inline]
+            fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+                debug_simd_finish(f, stringify!($id), self.as_array())
             }
         }
     }
@@ -53,7 +72,7 @@ macro_rules! simd_ty {
 macro_rules! simd_m_ty {
     ($id:ident [$elem_type:ident ; $len:literal]: $($param_name:ident),*) => {
         #[repr(simd)]
-        #[derive(Copy, Clone, Debug, PartialEq)]
+        #[derive(Copy, Clone)]
         pub(crate) struct $id([$elem_type; $len]);
 
         #[allow(clippy::use_self)]
@@ -79,6 +98,30 @@ macro_rules! simd_m_ty {
                 // a simd type with exactly one element.
                 unsafe { simd_shuffle!(one, one, [0; $len]) }
             }
+
+            #[inline]
+            pub(crate) fn as_array(&self) -> &[$elem_type; $len] {
+                let simd_ptr: *const Self = self;
+                let array_ptr: *const [$elem_type; $len] = simd_ptr.cast();
+                // SAFETY: We can always read the prefix of a simd type as an array.
+                // There might be more padding afterwards for some widths, but
+                // that's not a problem for reading less than that.
+                unsafe { &*array_ptr }
+            }
+        }
+
+        impl core::cmp::PartialEq for $id {
+            #[inline]
+            fn eq(&self, other: &Self) -> bool {
+                self.as_array() == other.as_array()
+            }
+        }
+
+        impl core::fmt::Debug for $id {
+            #[inline]
+            fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+                debug_simd_finish(f, stringify!($id), self.as_array())
+            }
         }
     }
 }
@@ -968,7 +1011,7 @@ simd_ty!(
 pub(crate) fn debug_simd_finish<T: crate::fmt::Debug, const N: usize>(
     formatter: &mut crate::fmt::Formatter<'_>,
     type_name: &str,
-    array: [T; N],
+    array: &[T; N],
 ) -> crate::fmt::Result {
     crate::fmt::Formatter::debug_tuple_fields_finish(
         formatter,

From eba22171d4c1bb043fca7579820fbec4a18d32f0 Mon Sep 17 00:00:00 2001
From: Sayantan Chakraborty <142906350+sayantn@users.noreply.github.com>
Date: Sat, 15 Mar 2025 22:36:18 +0530
Subject: [PATCH 03/57] Update SDE mirror to ci-mirrors.rust-lang.org

This is to combat the spurious CI failures in emulated run. Also helps with updatability and compatibility - it will work even if Intel changes the link
---
 ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile b/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile
index 70ec5feecd..fe5fe474ca 100644
--- a/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile
+++ b/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile
@@ -8,8 +8,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
   wget \
   xz-utils
 
-RUN wget https://downloadmirror.intel.com/843185/sde-external-9.48.0-2024-11-25-lin.tar.xz
-RUN tar -xJf sde-external-9.48.0-2024-11-25-lin.tar.xz
-ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/sde-external-9.48.0-2024-11-25-lin/sde64 \
+RUN wget http://ci-mirrors.rust-lang.org/stdarch/sde-external-9.48.0-2024-11-25-lin.tar.xz -O sde.tar.xz
+RUN mkdir intel-sde
+RUN tar -xJf sde.tar.xz --strip-components=1 -C intel-sde
+ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \
             -cpuid-in /checkout/ci/docker/x86_64-unknown-linux-gnu-emulated/cpuid.def \
             -rtm-mode full -tsx --"

From 31c9f7a4ac7cadd22642d520d102e190beda971d Mon Sep 17 00:00:00 2001
From: Alex Crichton <alex@alexcrichton.com>
Date: Mon, 3 Mar 2025 07:41:50 -0800
Subject: [PATCH 04/57] wasm32: Fix undefined behavior with shift intrinsics

This commit fixes an issue where simd shift intrinsic in LLVM are
undefined behavior if the shift amount is larger than the bit width of
the lane. While in WebAssembly the corresponding instructions are
defined as masking out the upper bits we need to represent that
explicitly in LLVM IR to ensure that the semantics remain defined.

cc rust-lang/rust#137941
---
 crates/core_arch/src/wasm32/simd128.rs | 28 ++++++++++++--------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/crates/core_arch/src/wasm32/simd128.rs b/crates/core_arch/src/wasm32/simd128.rs
index a774b68cfd..8812245b79 100644
--- a/crates/core_arch/src/wasm32/simd128.rs
+++ b/crates/core_arch/src/wasm32/simd128.rs
@@ -2318,7 +2318,7 @@ pub fn u8x16_narrow_i16x8(a: v128, b: v128) -> v128 {
 #[doc(alias("i8x16.shl"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn i8x16_shl(a: v128, amt: u32) -> v128 {
-    unsafe { simd_shl(a.as_i8x16(), simd::i8x16::splat(amt as i8)).v128() }
+    unsafe { simd_shl(a.as_i8x16(), simd::i8x16::splat((amt & 0x7) as i8)).v128() }
 }
 
 #[stable(feature = "wasm_simd", since = "1.54.0")]
@@ -2335,7 +2335,7 @@ pub use i8x16_shl as u8x16_shl;
 #[doc(alias("i8x16.shr_s"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn i8x16_shr(a: v128, amt: u32) -> v128 {
-    unsafe { simd_shr(a.as_i8x16(), simd::i8x16::splat(amt as i8)).v128() }
+    unsafe { simd_shr(a.as_i8x16(), simd::i8x16::splat((amt & 0x7) as i8)).v128() }
 }
 
 /// Shifts each lane to the right by the specified number of bits, shifting in
@@ -2349,7 +2349,7 @@ pub fn i8x16_shr(a: v128, amt: u32) -> v128 {
 #[doc(alias("i8x16.shr_u"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn u8x16_shr(a: v128, amt: u32) -> v128 {
-    unsafe { simd_shr(a.as_u8x16(), simd::u8x16::splat(amt as u8)).v128() }
+    unsafe { simd_shr(a.as_u8x16(), simd::u8x16::splat((amt & 0x7) as u8)).v128() }
 }
 
 /// Adds two 128-bit vectors as if they were two packed sixteen 8-bit integers.
@@ -2686,7 +2686,7 @@ pub use i16x8_extend_high_u8x16 as u16x8_extend_high_u8x16;
 #[doc(alias("i16x8.shl"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn i16x8_shl(a: v128, amt: u32) -> v128 {
-    unsafe { simd_shl(a.as_i16x8(), simd::i16x8::splat(amt as i16)).v128() }
+    unsafe { simd_shl(a.as_i16x8(), simd::i16x8::splat((amt & 0xf) as i16)).v128() }
 }
 
 #[stable(feature = "wasm_simd", since = "1.54.0")]
@@ -2703,7 +2703,7 @@ pub use i16x8_shl as u16x8_shl;
 #[doc(alias("i16x8.shr_s"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn i16x8_shr(a: v128, amt: u32) -> v128 {
-    unsafe { simd_shr(a.as_i16x8(), simd::i16x8::splat(amt as i16)).v128() }
+    unsafe { simd_shr(a.as_i16x8(), simd::i16x8::splat((amt & 0xf) as i16)).v128() }
 }
 
 /// Shifts each lane to the right by the specified number of bits, shifting in
@@ -2717,7 +2717,7 @@ pub fn i16x8_shr(a: v128, amt: u32) -> v128 {
 #[doc(alias("i16x8.shr_u"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn u16x8_shr(a: v128, amt: u32) -> v128 {
-    unsafe { simd_shr(a.as_u16x8(), simd::u16x8::splat(amt as u16)).v128() }
+    unsafe { simd_shr(a.as_u16x8(), simd::u16x8::splat((amt & 0xf) as u16)).v128() }
 }
 
 /// Adds two 128-bit vectors as if they were two packed eight 16-bit integers.
@@ -3136,7 +3136,7 @@ pub use i32x4_extend_high_u16x8 as u32x4_extend_high_u16x8;
 #[doc(alias("i32x4.shl"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn i32x4_shl(a: v128, amt: u32) -> v128 {
-    unsafe { simd_shl(a.as_i32x4(), simd::i32x4::splat(amt as i32)).v128() }
+    unsafe { simd_shl(a.as_i32x4(), simd::i32x4::splat((amt & 0x1f) as i32)).v128() }
 }
 
 #[stable(feature = "wasm_simd", since = "1.54.0")]
@@ -3153,7 +3153,7 @@ pub use i32x4_shl as u32x4_shl;
 #[doc(alias("i32x4.shr_s"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn i32x4_shr(a: v128, amt: u32) -> v128 {
-    unsafe { simd_shr(a.as_i32x4(), simd::i32x4::splat(amt as i32)).v128() }
+    unsafe { simd_shr(a.as_i32x4(), simd::i32x4::splat((amt & 0x1f) as i32)).v128() }
 }
 
 /// Shifts each lane to the right by the specified number of bits, shifting in
@@ -3167,7 +3167,7 @@ pub fn i32x4_shr(a: v128, amt: u32) -> v128 {
 #[doc(alias("i32x4.shr_u"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn u32x4_shr(a: v128, amt: u32) -> v128 {
-    unsafe { simd_shr(a.as_u32x4(), simd::u32x4::splat(amt)).v128() }
+    unsafe { simd_shr(a.as_u32x4(), simd::u32x4::splat(amt & 0x1f)).v128() }
 }
 
 /// Adds two 128-bit vectors as if they were two packed four 32-bit integers.
@@ -3502,7 +3502,7 @@ pub use i64x2_extend_high_u32x4 as u64x2_extend_high_u32x4;
 #[doc(alias("i64x2.shl"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn i64x2_shl(a: v128, amt: u32) -> v128 {
-    unsafe { simd_shl(a.as_i64x2(), simd::i64x2::splat(amt as i64)).v128() }
+    unsafe { simd_shl(a.as_i64x2(), simd::i64x2::splat((amt & 0x3f) as i64)).v128() }
 }
 
 #[stable(feature = "wasm_simd", since = "1.54.0")]
@@ -3519,7 +3519,7 @@ pub use i64x2_shl as u64x2_shl;
 #[doc(alias("i64x2.shr_s"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn i64x2_shr(a: v128, amt: u32) -> v128 {
-    unsafe { simd_shr(a.as_i64x2(), simd::i64x2::splat(amt as i64)).v128() }
+    unsafe { simd_shr(a.as_i64x2(), simd::i64x2::splat((amt & 0x3f) as i64)).v128() }
 }
 
 /// Shifts each lane to the right by the specified number of bits, shifting in
@@ -3533,7 +3533,7 @@ pub fn i64x2_shr(a: v128, amt: u32) -> v128 {
 #[doc(alias("i64x2.shr_u"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn u64x2_shr(a: v128, amt: u32) -> v128 {
-    unsafe { simd_shr(a.as_u64x2(), simd::u64x2::splat(amt as u64)).v128() }
+    unsafe { simd_shr(a.as_u64x2(), simd::u64x2::splat((amt & 0x3f) as u64)).v128() }
 }
 
 /// Adds two 128-bit vectors as if they were two packed two 64-bit integers.
@@ -4344,9 +4344,7 @@ mod tests {
         };
         assert_eq!(
             bytes,
-            [
-                -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16
-            ]
+            [-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16]
         );
     }
 

From d4a26b91a8ef07f4bec2ff089c01b82c70940481 Mon Sep 17 00:00:00 2001
From: Alex Crichton <alex@alexcrichton.com>
Date: Mon, 3 Mar 2025 07:44:53 -0800
Subject: [PATCH 05/57] Fix rustfmt

---
 crates/core_arch/src/wasm32/simd128.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/crates/core_arch/src/wasm32/simd128.rs b/crates/core_arch/src/wasm32/simd128.rs
index 8812245b79..83214df4fe 100644
--- a/crates/core_arch/src/wasm32/simd128.rs
+++ b/crates/core_arch/src/wasm32/simd128.rs
@@ -4344,7 +4344,9 @@ mod tests {
         };
         assert_eq!(
             bytes,
-            [-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16]
+            [
+                -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16
+            ]
         );
     }
 

From b7a1daf6128e0fa8a519bb6f5dae6d2374a6277c Mon Sep 17 00:00:00 2001
From: Alex Crichton <alex@alexcrichton.com>
Date: Mon, 10 Mar 2025 12:02:57 -0700
Subject: [PATCH 06/57] Document safety conditions of simd shifts

---
 crates/core_arch/src/wasm32/simd128.rs | 40 ++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/crates/core_arch/src/wasm32/simd128.rs b/crates/core_arch/src/wasm32/simd128.rs
index 83214df4fe..1a1e7dc780 100644
--- a/crates/core_arch/src/wasm32/simd128.rs
+++ b/crates/core_arch/src/wasm32/simd128.rs
@@ -2318,6 +2318,24 @@ pub fn u8x16_narrow_i16x8(a: v128, b: v128) -> v128 {
 #[doc(alias("i8x16.shl"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn i8x16_shl(a: v128, amt: u32) -> v128 {
+    // SAFETY: the safety of this intrinsic relies on the fact that the
+    // shift amount for each lane is less than the number of bits in the input
+    // lane. In this case the input has 8-bit lanes but the shift amount above
+    // is `u32`, so a mask is required to discard all the upper bits of `amt` to
+    // ensure that the safety condition is met.
+    //
+    // Note that this is distinct from the behavior of the native WebAssembly
+    // instruction here where WebAssembly defines this instruction as performing
+    // a mask as well. This is nonetheless required since this must have defined
+    // semantics in LLVM, not just WebAssembly.
+    //
+    // Finally note that this mask operation is not actually emitted into the
+    // final binary itself. LLVM understands that the wasm operation implicitly
+    // masks, so it knows this mask operation is redundant.
+    //
+    // Basically the extra mask here is required as a bridge from the documented
+    // semantics through LLVM back out to WebAssembly. Both ends have the
+    // documented semantics, and the mask is required by LLVM in the middle.
     unsafe { simd_shl(a.as_i8x16(), simd::i8x16::splat((amt & 0x7) as i8)).v128() }
 }
 
@@ -2335,6 +2353,8 @@ pub use i8x16_shl as u8x16_shl;
 #[doc(alias("i8x16.shr_s"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn i8x16_shr(a: v128, amt: u32) -> v128 {
+    // SAFETY: see i8x16_shl for more documentation why this is unsafe,
+    // essentially the shift amount must be valid hence the mask.
     unsafe { simd_shr(a.as_i8x16(), simd::i8x16::splat((amt & 0x7) as i8)).v128() }
 }
 
@@ -2349,6 +2369,8 @@ pub fn i8x16_shr(a: v128, amt: u32) -> v128 {
 #[doc(alias("i8x16.shr_u"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn u8x16_shr(a: v128, amt: u32) -> v128 {
+    // SAFETY: see i8x16_shl for more documentation why this is unsafe,
+    // essentially the shift amount must be valid hence the mask.
     unsafe { simd_shr(a.as_u8x16(), simd::u8x16::splat((amt & 0x7) as u8)).v128() }
 }
 
@@ -2686,6 +2708,8 @@ pub use i16x8_extend_high_u8x16 as u16x8_extend_high_u8x16;
 #[doc(alias("i16x8.shl"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn i16x8_shl(a: v128, amt: u32) -> v128 {
+    // SAFETY: see i8x16_shl for more documentation why this is unsafe,
+    // essentially the shift amount must be valid hence the mask.
     unsafe { simd_shl(a.as_i16x8(), simd::i16x8::splat((amt & 0xf) as i16)).v128() }
 }
 
@@ -2703,6 +2727,8 @@ pub use i16x8_shl as u16x8_shl;
 #[doc(alias("i16x8.shr_s"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn i16x8_shr(a: v128, amt: u32) -> v128 {
+    // SAFETY: see i8x16_shl for more documentation why this is unsafe,
+    // essentially the shift amount must be valid hence the mask.
     unsafe { simd_shr(a.as_i16x8(), simd::i16x8::splat((amt & 0xf) as i16)).v128() }
 }
 
@@ -2717,6 +2743,8 @@ pub fn i16x8_shr(a: v128, amt: u32) -> v128 {
 #[doc(alias("i16x8.shr_u"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn u16x8_shr(a: v128, amt: u32) -> v128 {
+    // SAFETY: see i8x16_shl for more documentation why this is unsafe,
+    // essentially the shift amount must be valid hence the mask.
     unsafe { simd_shr(a.as_u16x8(), simd::u16x8::splat((amt & 0xf) as u16)).v128() }
 }
 
@@ -3136,6 +3164,8 @@ pub use i32x4_extend_high_u16x8 as u32x4_extend_high_u16x8;
 #[doc(alias("i32x4.shl"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn i32x4_shl(a: v128, amt: u32) -> v128 {
+    // SAFETY: see i8x16_shl for more documentation why this is unsafe,
+    // essentially the shift amount must be valid hence the mask.
     unsafe { simd_shl(a.as_i32x4(), simd::i32x4::splat((amt & 0x1f) as i32)).v128() }
 }
 
@@ -3153,6 +3183,8 @@ pub use i32x4_shl as u32x4_shl;
 #[doc(alias("i32x4.shr_s"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn i32x4_shr(a: v128, amt: u32) -> v128 {
+    // SAFETY: see i8x16_shl for more documentation why this is unsafe,
+    // essentially the shift amount must be valid hence the mask.
     unsafe { simd_shr(a.as_i32x4(), simd::i32x4::splat((amt & 0x1f) as i32)).v128() }
 }
 
@@ -3167,6 +3199,8 @@ pub fn i32x4_shr(a: v128, amt: u32) -> v128 {
 #[doc(alias("i32x4.shr_u"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn u32x4_shr(a: v128, amt: u32) -> v128 {
+    // SAFETY: see i8x16_shl for more documentation why this is unsafe,
+    // essentially the shift amount must be valid hence the mask.
     unsafe { simd_shr(a.as_u32x4(), simd::u32x4::splat(amt & 0x1f)).v128() }
 }
 
@@ -3502,6 +3536,8 @@ pub use i64x2_extend_high_u32x4 as u64x2_extend_high_u32x4;
 #[doc(alias("i64x2.shl"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn i64x2_shl(a: v128, amt: u32) -> v128 {
+    // SAFETY: see i8x16_shl for more documentation why this is unsafe,
+    // essentially the shift amount must be valid hence the mask.
     unsafe { simd_shl(a.as_i64x2(), simd::i64x2::splat((amt & 0x3f) as i64)).v128() }
 }
 
@@ -3519,6 +3555,8 @@ pub use i64x2_shl as u64x2_shl;
 #[doc(alias("i64x2.shr_s"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn i64x2_shr(a: v128, amt: u32) -> v128 {
+    // SAFETY: see i8x16_shl for more documentation why this is unsafe,
+    // essentially the shift amount must be valid hence the mask.
     unsafe { simd_shr(a.as_i64x2(), simd::i64x2::splat((amt & 0x3f) as i64)).v128() }
 }
 
@@ -3533,6 +3571,8 @@ pub fn i64x2_shr(a: v128, amt: u32) -> v128 {
 #[doc(alias("i64x2.shr_u"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn u64x2_shr(a: v128, amt: u32) -> v128 {
+    // SAFETY: see i8x16_shl for more documentation why this is unsafe,
+    // essentially the shift amount must be valid hence the mask.
     unsafe { simd_shr(a.as_u64x2(), simd::u64x2::splat((amt & 0x3f) as u64)).v128() }
 }
 

From 5192f30cf73f8adf2bd7b24e7feadbc0d62e360d Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Wed, 19 Feb 2025 23:16:37 +0100
Subject: [PATCH 07/57] add `vec_reve`

---
 crates/core_arch/src/s390x/vector.rs | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 795e426384..db0d228bca 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -1212,6 +1212,21 @@ mod sealed {
         unsafe fn vec_reve(self) -> Self;
     }
 
+    #[repr(simd)]
+    struct ReverseMask<const N: usize>([u32; N]);
+
+    impl<const N: usize> ReverseMask<N> {
+        const fn new() -> Self {
+            let mut index = [0; N];
+            let mut i = 0;
+            while i < N {
+                index[i] = (N - i - 1) as u32;
+                i += 1;
+            }
+            ReverseMask(index)
+        }
+    }
+
     macro_rules! impl_reve {
         ($($ty:ident, $fun:ident, $instr:ident),*) => {
             $(

From cefe6c9fa3e7564a296a41ba478fd13243c16f88 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Tue, 25 Feb 2025 21:43:11 +0100
Subject: [PATCH 08/57] add `vec_xl` and `vec_xst`

---
 crates/core_arch/src/s390x/vector.rs | 148 +++++++++++++++++++++++++++
 1 file changed, 148 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index db0d228bca..af2d7820f6 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -1855,6 +1855,64 @@ mod sealed {
     }
 
     impl_vfae! { [idx_cc VectorFindAnyNeOrZeroIdxCC vec_find_any_ne_or_0_idx_cc] NeIdx vfaezbs vfaezhs vfaezfs }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vl))]
+    unsafe fn test_vector_load(offset: isize, ptr: *const i32) -> vector_signed_int {
+        ptr.byte_offset(offset)
+            .cast::<vector_signed_int>()
+            .read_unaligned()
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vst))]
+    unsafe fn test_vector_store(vector: vector_signed_int, offset: isize, ptr: *mut i32) {
+        ptr.byte_offset(offset)
+            .cast::<vector_signed_int>()
+            .write_unaligned(vector)
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorLoad: Sized {
+        type ElementType;
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_xl(offset: isize, ptr: *const Self::ElementType) -> Self {
+            ptr.byte_offset(offset).cast::<Self>().read_unaligned()
+        }
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorStore: Sized {
+        type ElementType;
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_xst(self, offset: isize, ptr: *mut Self::ElementType) {
+            ptr.byte_offset(offset).cast::<Self>().write_unaligned(self)
+        }
+    }
+
+    macro_rules! impl_load_store {
+        ($($ty:ident)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorLoad for t_t_l!($ty) {
+                    type ElementType = $ty;
+                }
+
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorStore for t_t_l!($ty) {
+                    type ElementType = $ty;
+                }
+            )*
+        }
+    }
+
+    impl_load_store! { i8 u8 i16 u16 i32 u32 i64 u64 f32 f64 }
 }
 
 /// Vector element-wise addition.
@@ -2690,6 +2748,22 @@ vec_find_any_cc! {
     VectorFindAnyNeOrZeroIdxCC vec_find_any_ne_or_0_idx_cc
 }
 
+/// Vector Load
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_xl<T: sealed::VectorLoad>(offset: isize, ptr: *const T::ElementType) -> T {
+    T::vec_xl(offset, ptr)
+}
+
+/// Vector Store
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_xst<T: sealed::VectorStore>(vector: T, offset: isize, ptr: *mut T::ElementType) {
+    vector.vec_xst(offset, ptr)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -3393,4 +3467,78 @@ mod tests {
         assert_eq!(c, 3);
         assert_eq!(d.as_array(), &[0, 16, 0, 0]);
     }
+
+    #[simd_test(enable = "vector")]
+    fn test_vector_load() {
+        let expected = [0xAAAA_AAAA, 0xBBBB_BBBB, 0xCCCC_CCCC, 0xDDDD_DDDD];
+
+        let source: [u32; 8] = [
+            0xAAAA_AAAA,
+            0xBBBB_BBBB,
+            0xCCCC_CCCC,
+            0xDDDD_DDDD,
+            0,
+            0,
+            0,
+            0,
+        ];
+        assert_eq!(
+            unsafe { vec_xl::<vector_unsigned_int>(0, source.as_ptr()) }.as_array(),
+            &expected
+        );
+
+        // offset is in bytes
+        let source: [u32; 8] = [
+            0x0000_AAAA,
+            0xAAAA_BBBB,
+            0xBBBB_CCCC,
+            0xCCCC_DDDD,
+            0xDDDD_0000,
+            0,
+            0,
+            0,
+        ];
+        assert_eq!(
+            unsafe { vec_xl::<vector_unsigned_int>(2, source.as_ptr()) }.as_array(),
+            &expected
+        );
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vector_store() {
+        let vec = vector_unsigned_int([0xAAAA_AAAA, 0xBBBB_BBBB, 0xCCCC_CCCC, 0xDDDD_DDDD]);
+
+        let mut dest = [0u32; 8];
+        unsafe { vec_xst(vec, 0, dest.as_mut_ptr()) };
+        assert_eq!(
+            dest,
+            [
+                0xAAAA_AAAA,
+                0xBBBB_BBBB,
+                0xCCCC_CCCC,
+                0xDDDD_DDDD,
+                0,
+                0,
+                0,
+                0
+            ]
+        );
+
+        // offset is in bytes
+        let mut dest = [0u32; 8];
+        unsafe { vec_xst(vec, 2, dest.as_mut_ptr()) };
+        assert_eq!(
+            dest,
+            [
+                0x0000_AAAA,
+                0xAAAA_BBBB,
+                0xBBBB_CCCC,
+                0xCCCC_DDDD,
+                0xDDDD_0000,
+                0,
+                0,
+                0,
+            ]
+        );
+    }
 }

From 6d6ca491097a7360199a813b35f4e431b54bca71 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Tue, 25 Feb 2025 22:16:40 +0100
Subject: [PATCH 09/57] add `vec_load_len` and `vec_store_len`

---
 crates/core_arch/src/s390x/vector.rs | 56 ++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index af2d7820f6..127281f883 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -142,6 +142,9 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vfaezbs"] fn vfaezbs(a: vector_signed_char, b: vector_signed_char, c: i32) -> PackedTuple<vector_signed_char, i32>;
     #[link_name = "llvm.s390.vfaezhs"] fn vfaezhs(a: vector_signed_short, b: vector_signed_short, c: i32) -> PackedTuple<vector_signed_short, i32>;
     #[link_name = "llvm.s390.vfaezfs"] fn vfaezfs(a: vector_signed_int, b: vector_signed_int, c: i32) -> PackedTuple<vector_signed_int, i32>;
+
+    #[link_name = "llvm.s390.vll"] fn vll(a: u32, b: *const u8) -> vector_signed_char;
+    #[link_name = "llvm.s390.vstl"] fn vstl(a: vector_signed_char, b: u32, c: *mut u8);
 }
 
 impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -1883,6 +1886,8 @@ mod sealed {
         unsafe fn vec_xl(offset: isize, ptr: *const Self::ElementType) -> Self {
             ptr.byte_offset(offset).cast::<Self>().read_unaligned()
         }
+
+        unsafe fn vec_load_len(ptr: *const Self::ElementType, byte_count: u32) -> Self;
     }
 
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
@@ -1894,6 +1899,8 @@ mod sealed {
         unsafe fn vec_xst(self, offset: isize, ptr: *mut Self::ElementType) {
             ptr.byte_offset(offset).cast::<Self>().write_unaligned(self)
         }
+
+        unsafe fn vec_store_len(self, ptr: *mut Self::ElementType, byte_count: u32);
     }
 
     macro_rules! impl_load_store {
@@ -1902,17 +1909,43 @@ mod sealed {
                 #[unstable(feature = "stdarch_s390x", issue = "135681")]
                 impl VectorLoad for t_t_l!($ty) {
                     type ElementType = $ty;
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_load_len(ptr: *const Self::ElementType, byte_count: u32) -> Self {
+                        transmute(vll( byte_count, ptr.cast(),))
+                    }
                 }
 
                 #[unstable(feature = "stdarch_s390x", issue = "135681")]
                 impl VectorStore for t_t_l!($ty) {
                     type ElementType = $ty;
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_store_len(self, ptr: *mut Self::ElementType, byte_count: u32) {
+                        vstl(transmute(self), byte_count, ptr.cast())
+                    }
                 }
             )*
         }
     }
 
     impl_load_store! { i8 u8 i16 u16 i32 u32 i64 u64 f32 f64 }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vll))]
+    unsafe fn test_vec_load_len(ptr: *const i32, byte_count: u32) -> vector_signed_int {
+        vector_signed_int::vec_load_len(ptr, byte_count)
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vst))]
+    unsafe fn test_vec_store_len(vector: vector_signed_int, ptr: *mut i32, byte_count: u32) {
+        vector.vec_store_len(ptr, byte_count)
+    }
 }
 
 /// Vector element-wise addition.
@@ -2764,6 +2797,29 @@ pub unsafe fn vec_xst<T: sealed::VectorStore>(vector: T, offset: isize, ptr: *mu
     vector.vec_xst(offset, ptr)
 }
 
+/// Vector Load
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_load_len<T: sealed::VectorLoad>(
+    ptr: *const T::ElementType,
+    byte_count: u32,
+) -> T {
+    T::vec_load_len(ptr, byte_count)
+}
+
+/// Vector Store
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_store_len<T: sealed::VectorStore>(
+    vector: T,
+    ptr: *mut T::ElementType,
+    byte_count: u32,
+) {
+    vector.vec_store_len(ptr, byte_count)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;

From 21b00cbc4d825184beb753a465bd35caa5770d96 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Tue, 25 Feb 2025 22:31:35 +0100
Subject: [PATCH 10/57] add `vec_load_len_r` and `vec_store_len_r`

---
 crates/core_arch/src/s390x/vector.rs | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 127281f883..33094032b5 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -145,6 +145,10 @@ unsafe extern "unadjusted" {
 
     #[link_name = "llvm.s390.vll"] fn vll(a: u32, b: *const u8) -> vector_signed_char;
     #[link_name = "llvm.s390.vstl"] fn vstl(a: vector_signed_char, b: u32, c: *mut u8);
+
+    #[link_name = "llvm.s390.vlrl"] fn vlrl(a: u32, b: *const u8) -> vector_unsigned_char;
+    #[link_name = "llvm.s390.vstrl"] fn vstrl(a: vector_unsigned_char, b: u32, c: *mut u8);
+
 }
 
 impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -2820,6 +2824,24 @@ pub unsafe fn vec_store_len<T: sealed::VectorStore>(
     vector.vec_store_len(ptr, byte_count)
 }
 
+/// Vector Load Rightmost with Length
+#[inline]
+#[target_feature(enable = "vector-packed-decimal")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vlrlr))]
+pub unsafe fn vec_load_len_r(ptr: *const u8, byte_count: u32) -> vector_unsigned_char {
+    vlrl(byte_count, ptr)
+}
+
+/// Vector Store Rightmost with Length
+#[inline]
+#[target_feature(enable = "vector-packed-decimal")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vstrlr))]
+pub unsafe fn vec_store_len_r(vector: vector_unsigned_char, ptr: *mut u8, byte_count: u32) {
+    vstrl(vector, byte_count, ptr)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;

From bb23a4843d8d626cac9c51b5a4cc316f282801b0 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Tue, 25 Feb 2025 23:02:29 +0100
Subject: [PATCH 11/57] add `vec_load_bndry`, `__lcbb` and `vec_load_pair`

---
 crates/core_arch/src/s390x/vector.rs | 105 ++++++++++++++++++++++++++-
 1 file changed, 102 insertions(+), 3 deletions(-)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 33094032b5..7931accf30 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -7,7 +7,7 @@
 
 #![allow(non_camel_case_types)]
 
-use crate::{core_arch::simd::*, intrinsics::simd::*, mem::transmute};
+use crate::{core_arch::simd::*, intrinsics::simd::*, mem::MaybeUninit, mem::transmute};
 
 #[cfg(test)]
 use stdarch_test::assert_instr;
@@ -149,6 +149,8 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vlrl"] fn vlrl(a: u32, b: *const u8) -> vector_unsigned_char;
     #[link_name = "llvm.s390.vstrl"] fn vstrl(a: vector_unsigned_char, b: u32, c: *mut u8);
 
+    #[link_name = "llvm.s390.lcbb"] fn lcbb(a: *const u8, b: u32) -> u32;
+    #[link_name = "llvm.s390.vlbb"] fn vlbb(a: *const u8, b: u32) -> MaybeUninit<vector_signed_char>;
 }
 
 impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -236,6 +238,16 @@ const fn genmasks(bit_width: u32, a: u8, b: u8) -> u64 {
     ((1u64.wrapping_shl(a as u32 + 1)) - 1) & !((1u64.wrapping_shl(b as u32)) - 1)
 }
 
+const fn validate_block_boundary(block_boundary: u16) -> u32 {
+    assert!(
+        block_boundary.is_power_of_two() && block_boundary >= 64 && block_boundary <= 4096,
+        "block boundary must be a constant power of 2 from 64 to 4096",
+    );
+
+    // so that 64 is encoded as 0, 128 as 1, ect.
+    block_boundary as u32 >> 7
+}
+
 #[macro_use]
 mod sealed {
     use super::*;
@@ -1892,6 +1904,10 @@ mod sealed {
         }
 
         unsafe fn vec_load_len(ptr: *const Self::ElementType, byte_count: u32) -> Self;
+
+        unsafe fn vec_load_bndry<const BLOCK_BOUNDARY: u16>(
+            ptr: *const Self::ElementType,
+        ) -> MaybeUninit<Self>;
     }
 
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
@@ -1919,6 +1935,13 @@ mod sealed {
                     unsafe fn vec_load_len(ptr: *const Self::ElementType, byte_count: u32) -> Self {
                         transmute(vll( byte_count, ptr.cast(),))
                     }
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_load_bndry<const BLOCK_BOUNDARY: u16>(ptr: *const Self::ElementType) -> MaybeUninit<Self> {
+                        transmute(vlbb(ptr.cast(), const { validate_block_boundary(BLOCK_BOUNDARY) }))
+                    }
+
                 }
 
                 #[unstable(feature = "stdarch_s390x", issue = "135681")]
@@ -1944,12 +1967,57 @@ mod sealed {
         vector_signed_int::vec_load_len(ptr, byte_count)
     }
 
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr("vlbb"))]
+    unsafe fn test_vec_load_bndry(ptr: *const i32) -> MaybeUninit<vector_signed_int> {
+        vector_signed_int::vec_load_bndry::<512>(ptr)
+    }
+
     #[inline]
     #[target_feature(enable = "vector")]
     #[cfg_attr(test, assert_instr(vst))]
     unsafe fn test_vec_store_len(vector: vector_signed_int, ptr: *mut i32, byte_count: u32) {
         vector.vec_store_len(ptr, byte_count)
     }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorLoadPair: Sized {
+        type ElementType;
+
+        unsafe fn vec_load_pair(a: Self::ElementType, b: Self::ElementType) -> Self;
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorLoadPair for vector_signed_long_long {
+        type ElementType = i64;
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_load_pair(a: i64, b: i64) -> Self {
+            vector_signed_long_long([a, b])
+        }
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorLoadPair for vector_unsigned_long_long {
+        type ElementType = u64;
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_load_pair(a: u64, b: u64) -> Self {
+            vector_unsigned_long_long([a, b])
+        }
+    }
+}
+
+/// Load Count to Block Boundary
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(lcbb, BLOCK_BOUNDARY = 512))]
+unsafe fn __lcbb<const BLOCK_BOUNDARY: u16>(ptr: *const u8) -> u32 {
+    lcbb(ptr, const { validate_block_boundary(BLOCK_BOUNDARY) })
 }
 
 /// Vector element-wise addition.
@@ -2793,6 +2861,24 @@ pub unsafe fn vec_xl<T: sealed::VectorLoad>(offset: isize, ptr: *const T::Elemen
     T::vec_xl(offset, ptr)
 }
 
+/// Vector Load Pair
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_load_pair<T: sealed::VectorLoadPair>(a: T::ElementType, b: T::ElementType) -> T {
+    T::vec_load_pair(a, b)
+}
+
+/// Vector Load to Block Boundary
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_load_bndry<T: sealed::VectorLoad, const BLOCK_BOUNDARY: u16>(
+    ptr: *const T::ElementType,
+) -> MaybeUninit<T> {
+    T::vec_load_bndry::<BLOCK_BOUNDARY>(ptr)
+}
+
 /// Vector Store
 #[inline]
 #[target_feature(enable = "vector")]
@@ -2801,7 +2887,7 @@ pub unsafe fn vec_xst<T: sealed::VectorStore>(vector: T, offset: isize, ptr: *mu
     vector.vec_xst(offset, ptr)
 }
 
-/// Vector Load
+/// Vector Load with Length
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
@@ -2812,7 +2898,7 @@ pub unsafe fn vec_load_len<T: sealed::VectorLoad>(
     T::vec_load_len(ptr, byte_count)
 }
 
-/// Vector Store
+/// Vector Store with Length
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
@@ -3619,4 +3705,17 @@ mod tests {
             ]
         );
     }
+
+    #[simd_test(enable = "vector")]
+    fn test_vector_lcbb() {
+        #[repr(align(64))]
+        struct Align64<T>(T);
+
+        static ARRAY: Align64<[u8; 128]> = Align64([0; 128]);
+
+        assert_eq!(unsafe { __lcbb::<64>(ARRAY.0[64..].as_ptr()) }, 16);
+        assert_eq!(unsafe { __lcbb::<64>(ARRAY.0[63..].as_ptr()) }, 1);
+        assert_eq!(unsafe { __lcbb::<64>(ARRAY.0[56..].as_ptr()) }, 8);
+        assert_eq!(unsafe { __lcbb::<64>(ARRAY.0[48..].as_ptr()) }, 16);
+    }
 }

From 4f57e5275ca4be9118221ca65642dc2f314fa7dc Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Sat, 1 Mar 2025 15:47:37 +0100
Subject: [PATCH 12/57] add `vec_pack`, `vec_packs` and `vec_packsu`

---
 crates/core_arch/src/s390x/vector.rs | 182 +++++++++++++++++++++++++++
 1 file changed, 182 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 7931accf30..2d5e696b04 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -151,6 +151,14 @@ unsafe extern "unadjusted" {
 
     #[link_name = "llvm.s390.lcbb"] fn lcbb(a: *const u8, b: u32) -> u32;
     #[link_name = "llvm.s390.vlbb"] fn vlbb(a: *const u8, b: u32) -> MaybeUninit<vector_signed_char>;
+
+    #[link_name = "llvm.s390.vpksh"] fn vpksh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_char;
+    #[link_name = "llvm.s390.vpksf"] fn vpksf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_short;
+    #[link_name = "llvm.s390.vpksg"] fn vpksg(a: vector_signed_long_long, b: vector_signed_long_long) -> vector_signed_int;
+
+    #[link_name = "llvm.s390.vpklsh"] fn vpklsh(a: vector_signed_short, b: vector_signed_short) -> vector_unsigned_char;
+    #[link_name = "llvm.s390.vpklsf"] fn vpklsf(a: vector_signed_int, b: vector_signed_int) -> vector_unsigned_short;
+    #[link_name = "llvm.s390.vpklsg"] fn vpklsg(a: vector_signed_long_long, b: vector_signed_long_long) -> vector_unsigned_int;
 }
 
 impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -203,6 +211,19 @@ impl<const N: usize> ShuffleMask<N> {
         }
         ShuffleMask(mask)
     }
+
+    const fn pack() -> Self {
+        let mut mask = [0; N];
+        let mut i = 1;
+        let mut index = 0;
+        while index < N {
+            mask[index] = i as u32;
+
+            i += 2;
+            index += 1;
+        }
+        ShuffleMask(mask)
+    }
 }
 
 const fn genmask<const MASK: u16>() -> [u8; 16] {
@@ -2009,6 +2030,114 @@ mod sealed {
             vector_unsigned_long_long([a, b])
         }
     }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    unsafe fn pack<T, const N: usize>(a: T, b: T) -> T {
+        simd_shuffle(a, b, const { ShuffleMask::<N>::pack() })
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vpkh))]
+    unsafe fn vpkh(a: i16x8, b: i16x8) -> i8x16 {
+        let a: i8x16 = transmute(a);
+        let b: i8x16 = transmute(b);
+        simd_shuffle(a, b, const { ShuffleMask::<16>::pack() })
+    }
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vpkf))]
+    unsafe fn vpkf(a: i32x4, b: i32x4) -> i16x8 {
+        let a: i16x8 = transmute(a);
+        let b: i16x8 = transmute(b);
+        simd_shuffle(a, b, const { ShuffleMask::<8>::pack() })
+    }
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vpkg))]
+    unsafe fn vpkg(a: i64x2, b: i64x2) -> i32x4 {
+        let a: i32x4 = transmute(a);
+        let b: i32x4 = transmute(b);
+        simd_shuffle(a, b, const { ShuffleMask::<4>::pack() })
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorPack<Other> {
+        type Result;
+        unsafe fn vec_pack(self, b: Other) -> Self::Result;
+    }
+
+    impl_vec_trait! { [VectorPack vec_pack]+ vpkh (vector_signed_short, vector_signed_short) -> vector_signed_char }
+    impl_vec_trait! { [VectorPack vec_pack]+ vpkh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_char }
+    impl_vec_trait! { [VectorPack vec_pack]+ vpkh (vector_bool_short, vector_bool_short) -> vector_bool_char }
+    impl_vec_trait! { [VectorPack vec_pack]+ vpkf (vector_signed_int, vector_signed_int) -> vector_signed_short }
+    impl_vec_trait! { [VectorPack vec_pack]+ vpkf (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_short }
+    impl_vec_trait! { [VectorPack vec_pack]+ vpkf (vector_bool_int, vector_bool_int) -> vector_bool_short }
+    impl_vec_trait! { [VectorPack vec_pack]+ vpkg (vector_signed_long_long, vector_signed_long_long) -> vector_signed_int }
+    impl_vec_trait! { [VectorPack vec_pack]+ vpkg (vector_unsigned_long_long, vector_unsigned_long_long) -> vector_unsigned_int }
+    impl_vec_trait! { [VectorPack vec_pack]+ vpkg (vector_bool_long_long, vector_bool_long_long) -> vector_bool_int }
+
+    #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+    pub trait VectorPacks<Other> {
+        type Result;
+        unsafe fn vec_packs(self, b: Other) -> Self::Result;
+    }
+
+    impl_vec_trait! { [VectorPacks vec_packs] vpksh (vector_signed_short, vector_signed_short) -> vector_signed_char }
+    impl_vec_trait! { [VectorPacks vec_packs] vpklsh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_char }
+    impl_vec_trait! { [VectorPacks vec_packs] vpksf (vector_signed_int, vector_signed_int) -> vector_signed_short }
+    impl_vec_trait! { [VectorPacks vec_packs] vpklsf (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_short }
+    impl_vec_trait! { [VectorPacks vec_packs] vpksg (vector_signed_long_long, vector_signed_long_long) -> vector_signed_int }
+    impl_vec_trait! { [VectorPacks vec_packs] vpklsg (vector_unsigned_long_long, vector_unsigned_long_long) -> vector_unsigned_int }
+
+    #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+    pub trait VectorPacksu<Other> {
+        type Result;
+        unsafe fn vec_packsu(self, b: Other) -> Self::Result;
+    }
+
+    unsafe fn simd_smax<T: Copy>(a: T, b: T) -> T {
+        simd_select::<T, T>(simd_gt::<T, T>(a, b), a, b)
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vpklsh))]
+    unsafe fn vpacksuh(a: vector_signed_short, b: vector_signed_short) -> vector_unsigned_char {
+        vpklsh(
+            simd_smax(a, vector_signed_short([0; 8])),
+            simd_smax(b, vector_signed_short([0; 8])),
+        )
+    }
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vpklsf))]
+    unsafe fn vpacksuf(a: vector_signed_int, b: vector_signed_int) -> vector_unsigned_short {
+        vpklsf(
+            simd_smax(a, vector_signed_int([0; 4])),
+            simd_smax(b, vector_signed_int([0; 4])),
+        )
+    }
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vpklsg))]
+    unsafe fn vpacksug(
+        a: vector_signed_long_long,
+        b: vector_signed_long_long,
+    ) -> vector_unsigned_int {
+        vpklsg(
+            simd_smax(a, vector_signed_long_long([0; 2])),
+            simd_smax(b, vector_signed_long_long([0; 2])),
+        )
+    }
+
+    impl_vec_trait! { [VectorPacksu vec_packsu] vpacksuh (vector_signed_short, vector_signed_short) -> vector_unsigned_char }
+    impl_vec_trait! { [VectorPacksu vec_packsu] vpklsh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_char }
+    impl_vec_trait! { [VectorPacksu vec_packsu] vpacksuf (vector_signed_int, vector_signed_int) -> vector_unsigned_short }
+    impl_vec_trait! { [VectorPacksu vec_packsu] vpklsf (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_short }
+    impl_vec_trait! { [VectorPacksu vec_packsu] vpacksug (vector_signed_long_long, vector_signed_long_long) -> vector_unsigned_int }
+    impl_vec_trait! { [VectorPacksu vec_packsu] vpklsg (vector_unsigned_long_long, vector_unsigned_long_long) -> vector_unsigned_int }
 }
 
 /// Load Count to Block Boundary
@@ -2544,6 +2673,30 @@ where
     a.vec_mergeh(b)
 }
 
+/// Vector Pack
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_pack<T: sealed::VectorPack<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_pack(b)
+}
+
+/// Vector Pack Saturated
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_packs<T: sealed::VectorPacks<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_packs(b)
+}
+
+/// Vector Pack Saturated Unsigned
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_packsu<T: sealed::VectorPacksu<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_packsu(b)
+}
+
 /// Merges the least significant ("low") halves of two vectors.
 #[inline]
 #[target_feature(enable = "vector")]
@@ -2952,6 +3105,11 @@ mod tests {
         assert_eq!(ShuffleMask::<4>::merge_high().0, [0, 4, 1, 5]);
     }
 
+    #[test]
+    fn pack_mask() {
+        assert_eq!(ShuffleMask::<4>::pack().0, [1, 3, 5, 7]);
+    }
+
     #[test]
     fn test_vec_mask() {
         assert_eq!(
@@ -3718,4 +3876,28 @@ mod tests {
         assert_eq!(unsafe { __lcbb::<64>(ARRAY.0[56..].as_ptr()) }, 8);
         assert_eq!(unsafe { __lcbb::<64>(ARRAY.0[48..].as_ptr()) }, 16);
     }
+
+    test_vec_2! { test_vec_pack, vec_pack, i16x8, i16x8 -> i8x16,
+        [0, 1, -1, 42, 32767, -32768, 30000, -30000],
+        [32767, -32768, 12345, -12345, 0, 1, -1, 42],
+        [0, 1, -1, 42, -1, 0, 48, -48, -1, 0, 57, -57, 0, 1, -1, 42]
+    }
+
+    test_vec_2! { test_vec_packs, vec_packs, i16x8, i16x8 -> i8x16,
+        [0, 1, -1, 42, 32767, -32768, 30000, -30000],
+        [32767, -32768, 12345, -12345, 0, 1, -1, 42],
+        [0, 1, -1, 42, 127, -128, 127, -128, 127, -128, 127, -128, 0, 1, -1, 42]
+    }
+
+    test_vec_2! { test_vec_packsu_signed, vec_packsu, i16x8, i16x8 -> u8x16,
+        [0, 1, -1, 42, 32767, -32768, 30000, -30000],
+        [32767, -32768, 12345, -12345, 0, 1, -1, 42],
+        [0, 1, 0, 42, 255, 0, 255, 0, 255, 0, 255, 0, 0, 1, 0, 42]
+    }
+
+    test_vec_2! { test_vec_packsu_unsigned, vec_packsu, u16x8, u16x8 -> u8x16,
+        [65535, 32768, 1234, 5678, 16, 8, 4, 2],
+        [30000, 25000, 20000, 15000, 31, 63, 127, 255],
+        [255, 255, 255, 255, 16, 8, 4, 2, 255, 255, 255, 255, 31, 63, 127, 255]
+    }
 }

From 8870be21558354bdd31e746ddb3fa5845e4f833f Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Sat, 1 Mar 2025 16:25:05 +0100
Subject: [PATCH 13/57] add `vec_packs_cc` and `vec_packsu_cc`

---
 crates/core_arch/src/s390x/vector.rs | 110 +++++++++++++++++++++++++--
 1 file changed, 104 insertions(+), 6 deletions(-)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 2d5e696b04..65f62e50e0 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -159,6 +159,14 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vpklsh"] fn vpklsh(a: vector_signed_short, b: vector_signed_short) -> vector_unsigned_char;
     #[link_name = "llvm.s390.vpklsf"] fn vpklsf(a: vector_signed_int, b: vector_signed_int) -> vector_unsigned_short;
     #[link_name = "llvm.s390.vpklsg"] fn vpklsg(a: vector_signed_long_long, b: vector_signed_long_long) -> vector_unsigned_int;
+
+    #[link_name = "llvm.s390.vpkshs"] fn vpkshs(a: vector_signed_short, b: vector_signed_short) -> PackedTuple<vector_signed_char, i32>;
+    #[link_name = "llvm.s390.vpksfs"] fn vpksfs(a: vector_signed_int, b: vector_signed_int) -> PackedTuple<vector_signed_short, i32>;
+    #[link_name = "llvm.s390.vpksgs"] fn vpksgs(a: vector_signed_long_long, b: vector_signed_long_long) -> PackedTuple<vector_signed_int, i32>;
+
+    #[link_name = "llvm.s390.vpklshs"] fn vpklshs(a: vector_unsigned_short, b: vector_unsigned_short) -> PackedTuple<vector_unsigned_char, i32>;
+    #[link_name = "llvm.s390.vpklsfs"] fn vpklsfs(a: vector_unsigned_int, b: vector_unsigned_int) -> PackedTuple<vector_unsigned_short, i32>;
+    #[link_name = "llvm.s390.vpklsgs"] fn vpklsgs(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> PackedTuple<vector_unsigned_int, i32>;
 }
 
 impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -1839,13 +1847,13 @@ mod sealed {
      }
 
     vfaes_wrapper! {
-       vfaebs vector_signed_char
-       vfaehs vector_signed_short
-       vfaefs vector_signed_int
+        vfaebs vector_signed_char
+        vfaehs vector_signed_short
+        vfaefs vector_signed_int
 
-       vfaezbs vector_signed_char
-       vfaezhs vector_signed_short
-       vfaezfs vector_signed_int
+        vfaezbs vector_signed_char
+        vfaezhs vector_signed_short
+        vfaezfs vector_signed_int
     }
 
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
@@ -2138,6 +2146,80 @@ mod sealed {
     impl_vec_trait! { [VectorPacksu vec_packsu] vpklsf (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_short }
     impl_vec_trait! { [VectorPacksu vec_packsu] vpacksug (vector_signed_long_long, vector_signed_long_long) -> vector_unsigned_int }
     impl_vec_trait! { [VectorPacksu vec_packsu] vpklsg (vector_unsigned_long_long, vector_unsigned_long_long) -> vector_unsigned_int }
+
+    macro_rules! impl_vector_packs_cc {
+        ($($intr:ident $ty:ident $outty:ident)*) => {
+            $(
+                #[inline]
+                #[target_feature(enable = "vector")]
+                #[cfg_attr(test, assert_instr($intr))]
+                unsafe fn $intr(
+                    a: $ty,
+                    b: $ty,
+                    c: *mut i32,
+                ) -> $outty {
+                    let PackedTuple { x, y } = super::$intr(a, b);
+                    c.write(y);
+                    x
+                }
+
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorPacksCC for $ty {
+                    type Result = $outty;
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_packs_cc(self, b: Self, c: *mut i32) -> Self::Result {
+                        $intr(self, b, c)
+                    }
+                }
+            )*
+        }
+    }
+
+    #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+    pub trait VectorPacksCC {
+        type Result;
+        unsafe fn vec_packs_cc(self, b: Self, c: *mut i32) -> Self::Result;
+    }
+
+    impl_vector_packs_cc! {
+        vpkshs vector_signed_short vector_signed_char
+        vpklshs vector_unsigned_short vector_unsigned_char
+        vpksfs vector_signed_int vector_signed_short
+        vpklsfs vector_unsigned_int vector_unsigned_short
+        vpksgs vector_signed_long_long vector_signed_int
+        vpklsgs vector_unsigned_long_long vector_unsigned_int
+    }
+
+    macro_rules! impl_vector_packsu_cc {
+        ($($intr:ident $ty:ident $outty:ident)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorPacksuCC for $ty {
+                    type Result = $outty;
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_packsu_cc(self, b: Self, c: *mut i32) -> Self::Result {
+                        $intr(self, b, c)
+                    }
+                }
+            )*
+        }
+    }
+
+    #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+    pub trait VectorPacksuCC {
+        type Result;
+        unsafe fn vec_packsu_cc(self, b: Self, c: *mut i32) -> Self::Result;
+    }
+
+    impl_vector_packsu_cc! {
+        vpklshs vector_unsigned_short vector_unsigned_char
+        vpklsfs vector_unsigned_int vector_unsigned_short
+        vpklsgs vector_unsigned_long_long vector_unsigned_int
+    }
 }
 
 /// Load Count to Block Boundary
@@ -2689,6 +2771,14 @@ pub unsafe fn vec_packs<T: sealed::VectorPacks<U>, U>(a: T, b: U) -> T::Result {
     a.vec_packs(b)
 }
 
+/// Vector Pack Saturated Condition Code
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_packs_cc<T: sealed::VectorPacksCC>(a: T, b: T, c: *mut i32) -> T::Result {
+    a.vec_packs_cc(b, c)
+}
+
 /// Vector Pack Saturated Unsigned
 #[inline]
 #[target_feature(enable = "vector")]
@@ -2697,6 +2787,14 @@ pub unsafe fn vec_packsu<T: sealed::VectorPacksu<U>, U>(a: T, b: U) -> T::Result
     a.vec_packsu(b)
 }
 
+/// Vector Pack Saturated Unsigned Condition Code
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_packsu_cc<T: sealed::VectorPacksuCC>(a: T, b: T, c: *mut i32) -> T::Result {
+    a.vec_packsu_cc(b, c)
+}
+
 /// Merges the least significant ("low") halves of two vectors.
 #[inline]
 #[target_feature(enable = "vector")]

From d4ba3d2f30456995103bf11a598303a8bb6175a4 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Sun, 2 Mar 2025 00:02:11 +0100
Subject: [PATCH 14/57] add `vec_madd` and `vec_msub`

---
 crates/core_arch/src/s390x/vector.rs | 63 ++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 65f62e50e0..34839e55fe 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -2220,6 +2220,53 @@ mod sealed {
         vpklsfs vector_unsigned_int vector_unsigned_short
         vpklsgs vector_unsigned_long_long vector_unsigned_int
     }
+
+    #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+    pub trait VectorMadd {
+        unsafe fn vec_madd(self, b: Self, c: Self) -> Self;
+        unsafe fn vec_msub(self, b: Self, c: Self) -> Self;
+    }
+
+    test_impl! { vfmasb (a: vector_float, b: vector_float, c: vector_float) -> vector_float [simd_fma, "vector-enhancements-1" vfmasb] }
+    test_impl! { vfmadb (a: vector_double, b: vector_double, c: vector_double) -> vector_double [simd_fma, vfmadb] }
+
+    #[inline]
+    unsafe fn simd_fms<T>(a: T, b: T, c: T) -> T {
+        simd_fma(a, b, simd_neg(c))
+    }
+
+    test_impl! { vfmssb (a: vector_float, b: vector_float, c: vector_float) -> vector_float [simd_fms, "vector-enhancements-1" vfmssb] }
+    test_impl! { vfmsdb (a: vector_double, b: vector_double, c: vector_double) -> vector_double [simd_fms, vfmsdb] }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorMadd for vector_float {
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_madd(self, b: Self, c: Self) -> Self {
+            vfmasb(self, b, c)
+        }
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_msub(self, b: Self, c: Self) -> Self {
+            vfmssb(self, b, c)
+        }
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorMadd for vector_double {
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_madd(self, b: Self, c: Self) -> Self {
+            vfmadb(self, b, c)
+        }
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_msub(self, b: Self, c: Self) -> Self {
+            vfmsdb(self, b, c)
+        }
+    }
 }
 
 /// Load Count to Block Boundary
@@ -3179,6 +3226,22 @@ pub unsafe fn vec_store_len_r(vector: vector_unsigned_char, ptr: *mut u8, byte_c
     vstrl(vector, byte_count, ptr)
 }
 
+/// Vector Multiply Add
+#[inline]
+#[target_feature(enable = "vector-packed-decimal")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_madd<T: sealed::VectorMadd>(a: T, b: T, c: T) -> T {
+    a.vec_madd(b, c)
+}
+
+/// Vector Multiply Add
+#[inline]
+#[target_feature(enable = "vector-packed-decimal")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_msub<T: sealed::VectorMadd>(a: T, b: T, c: T) -> T {
+    a.vec_msub(b, c)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;

From 64ac60be4b64e659a5feab6573aaeb01d2260f89 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Sun, 2 Mar 2025 17:53:07 +0100
Subject: [PATCH 15/57] test `vec_rl`

---
 crates/core_arch/src/s390x/vector.rs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 34839e55fe..da83a1f7d1 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -3647,11 +3647,6 @@ mod tests {
     [0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 16],
     [-4, -2, -1, -8] }
 
-    test_vec_2! { test_vec_rl, vec_rl, u32x4,
-    [0x12345678, 0x9ABCDEF0, 0x0F0F0F0F, 0x12345678],
-    [4, 8, 12, 68],
-    [0x23456781, 0xBCDEF09A, 0xF0F0F0F0, 0x23456781] }
-
     test_vec_1! { test_vec_reve_f32, vec_reve, f32x4,
         [0.1, 0.5, 0.6, 0.9],
         [0.9, 0.6, 0.5, 0.1]
@@ -4061,4 +4056,9 @@ mod tests {
         [30000, 25000, 20000, 15000, 31, 63, 127, 255],
         [255, 255, 255, 255, 16, 8, 4, 2, 255, 255, 255, 255, 31, 63, 127, 255]
     }
+
+    test_vec_2! { test_vec_rl, vec_rl, u32x4,
+    [0x12345678, 0x9ABCDEF0, 0x0F0F0F0F, 0x12345678],
+    [4, 8, 12, 68],
+    [0x23456781, 0xBCDEF09A, 0xF0F0F0F0, 0x23456781] }
 }

From b5443d680ca526228bc7fcf51da0d9226506419c Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Mon, 3 Mar 2025 19:01:01 +0100
Subject: [PATCH 16/57] correct name of signed splat functions

---
 crates/core_arch/src/s390x/vector.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index da83a1f7d1..c3a191f2c2 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -3034,7 +3034,7 @@ pub unsafe fn vec_subec_u128(
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
 #[cfg_attr(test, assert_instr(vrepib, IMM = 42))]
-pub unsafe fn vec_splat_i8<const IMM: i8>() -> vector_signed_char {
+pub unsafe fn vec_splat_s8<const IMM: i8>() -> vector_signed_char {
     vector_signed_char([IMM; 16])
 }
 
@@ -3043,7 +3043,7 @@ pub unsafe fn vec_splat_i8<const IMM: i8>() -> vector_signed_char {
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
 #[cfg_attr(test, assert_instr(vrepih, IMM = 42))]
-pub unsafe fn vec_splat_i16<const IMM: i16>() -> vector_signed_short {
+pub unsafe fn vec_splat_s16<const IMM: i16>() -> vector_signed_short {
     vector_signed_short([IMM as i16; 8])
 }
 
@@ -3052,7 +3052,7 @@ pub unsafe fn vec_splat_i16<const IMM: i16>() -> vector_signed_short {
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
 #[cfg_attr(test, assert_instr(vrepif, IMM = 42))]
-pub unsafe fn vec_splat_i32<const IMM: i16>() -> vector_signed_int {
+pub unsafe fn vec_splat_s32<const IMM: i16>() -> vector_signed_int {
     vector_signed_int([IMM as i32; 4])
 }
 
@@ -3061,7 +3061,7 @@ pub unsafe fn vec_splat_i32<const IMM: i16>() -> vector_signed_int {
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
 #[cfg_attr(test, assert_instr(vrepig, IMM = 42))]
-pub unsafe fn vec_splat_i64<const IMM: i16>() -> vector_signed_long_long {
+pub unsafe fn vec_splat_s64<const IMM: i16>() -> vector_signed_long_long {
     vector_signed_long_long([IMM as i64; 2])
 }
 

From 8a1e79eba3eed3256b9260b34f74afa7756ef699 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Mon, 3 Mar 2025 19:53:50 +0100
Subject: [PATCH 17/57] add `vec_unpackh` and `vec_unpackl`

---
 crates/core_arch/src/s390x/vector.rs | 132 ++++++++++++++++++++++++++-
 1 file changed, 129 insertions(+), 3 deletions(-)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index c3a191f2c2..84f83423d2 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -167,6 +167,13 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vpklshs"] fn vpklshs(a: vector_unsigned_short, b: vector_unsigned_short) -> PackedTuple<vector_unsigned_char, i32>;
     #[link_name = "llvm.s390.vpklsfs"] fn vpklsfs(a: vector_unsigned_int, b: vector_unsigned_int) -> PackedTuple<vector_unsigned_short, i32>;
     #[link_name = "llvm.s390.vpklsgs"] fn vpklsgs(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> PackedTuple<vector_unsigned_int, i32>;
+
+    #[link_name = "llvm.s390.vuplbw"] fn vuplbw (a: vector_signed_char) -> vector_signed_short;
+    #[link_name = "llvm.s390.vuplhw"] fn vuplhw (a: vector_signed_short) -> vector_signed_int;
+    #[link_name = "llvm.s390.vuplfw"] fn vuplfw (a: vector_signed_int) -> vector_signed_long_long;
+    #[link_name = "llvm.s390.vupllb"] fn vupllb (a: vector_unsigned_char) -> vector_unsigned_short;
+    #[link_name = "llvm.s390.vupllh"] fn vupllh (a: vector_unsigned_short) -> vector_unsigned_int;
+    #[link_name = "llvm.s390.vupllf"] fn vupllf (a: vector_unsigned_int) -> vector_unsigned_long_long;
 }
 
 impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -232,6 +239,26 @@ impl<const N: usize> ShuffleMask<N> {
         }
         ShuffleMask(mask)
     }
+
+    const fn unpack_low() -> Self {
+        let mut mask = [0; N];
+        let mut i = 0;
+        while i < N {
+            mask[i] = (N + i) as u32;
+            i += 1;
+        }
+        ShuffleMask(mask)
+    }
+
+    const fn unpack_high() -> Self {
+        let mut mask = [0; N];
+        let mut i = 0;
+        while i < N {
+            mask[i] = i as u32;
+            i += 1;
+        }
+        ShuffleMask(mask)
+    }
 }
 
 const fn genmask<const MASK: u16>() -> [u8; 16] {
@@ -2267,6 +2294,68 @@ mod sealed {
             vfmsdb(self, b, c)
         }
     }
+
+    macro_rules! impl_vec_unpack {
+        ($mask:ident $instr:ident $src:ident $shuffled:ident $dst:ident $width:literal) => {
+            #[inline]
+            #[target_feature(enable = "vector")]
+            #[cfg_attr(test, assert_instr($instr))]
+            unsafe fn $instr(a: $src) -> $dst {
+                simd_as(simd_shuffle::<_, _, $shuffled>(
+                    a,
+                    a,
+                    const { ShuffleMask::<$width>::$mask() },
+                ))
+            }
+        };
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorUnpackh {
+        type Result;
+        unsafe fn vec_unpackh(self) -> Self::Result;
+    }
+
+    impl_vec_unpack!(unpack_high vuphb vector_signed_char i8x8 vector_signed_short 8);
+    impl_vec_unpack!(unpack_high vuphh vector_signed_short i16x4 vector_signed_int 4);
+    impl_vec_unpack!(unpack_high vuphf vector_signed_int i32x2 vector_signed_long_long 2);
+
+    impl_vec_unpack!(unpack_high vuplhb vector_unsigned_char u8x8 vector_unsigned_short 8);
+    impl_vec_unpack!(unpack_high vuplhh vector_unsigned_short u16x4 vector_unsigned_int 4);
+    impl_vec_unpack!(unpack_high vuplhf vector_unsigned_int u32x2 vector_unsigned_long_long 2);
+
+    impl_vec_trait! {[VectorUnpackh vec_unpackh] vuphb (vector_signed_char) -> vector_signed_short}
+    impl_vec_trait! {[VectorUnpackh vec_unpackh] vuphh (vector_signed_short) -> vector_signed_int}
+    impl_vec_trait! {[VectorUnpackh vec_unpackh] vuphf (vector_signed_int) -> vector_signed_long_long}
+
+    impl_vec_trait! {[VectorUnpackh vec_unpackh] vuplhb (vector_unsigned_char) -> vector_unsigned_short}
+    impl_vec_trait! {[VectorUnpackh vec_unpackh] vuplhh (vector_unsigned_short) -> vector_unsigned_int}
+    impl_vec_trait! {[VectorUnpackh vec_unpackh] vuplhf (vector_unsigned_int) -> vector_unsigned_long_long}
+
+    impl_vec_trait! {[VectorUnpackh vec_unpackh]+ vuplhb (vector_bool_char) -> vector_bool_short}
+    impl_vec_trait! {[VectorUnpackh vec_unpackh]+ vuplhh (vector_bool_short) -> vector_bool_int}
+    impl_vec_trait! {[VectorUnpackh vec_unpackh]+ vuplhf (vector_bool_int) -> vector_bool_long_long}
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorUnpackl {
+        type Result;
+        unsafe fn vec_unpackl(self) -> Self::Result;
+    }
+
+    // FIXME(llvm): a shuffle + simd_as does not currently optimize into a single instruction like
+    // unpachk above. Tracked in https://github.com/llvm/llvm-project/issues/129576.
+
+    impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplbw (vector_signed_char) -> vector_signed_short}
+    impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplhw (vector_signed_short) -> vector_signed_int}
+    impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplfw (vector_signed_int) -> vector_signed_long_long}
+
+    impl_vec_trait! {[VectorUnpackl vec_unpackl] vupllb (vector_unsigned_char) -> vector_unsigned_short}
+    impl_vec_trait! {[VectorUnpackl vec_unpackl] vupllh (vector_unsigned_short) -> vector_unsigned_int}
+    impl_vec_trait! {[VectorUnpackl vec_unpackl] vupllf (vector_unsigned_int) -> vector_unsigned_long_long}
+
+    impl_vec_trait! {[VectorUnpackl vec_unpackl]+ vupllb (vector_bool_char) -> vector_bool_short}
+    impl_vec_trait! {[VectorUnpackl vec_unpackl]+ vupllh (vector_bool_short) -> vector_bool_int}
+    impl_vec_trait! {[VectorUnpackl vec_unpackl]+ vupllf (vector_bool_int) -> vector_bool_long_long}
 }
 
 /// Load Count to Block Boundary
@@ -2842,6 +2931,22 @@ pub unsafe fn vec_packsu_cc<T: sealed::VectorPacksuCC>(a: T, b: T, c: *mut i32)
     a.vec_packsu_cc(b, c)
 }
 
+/// Vector Unpack High
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_unpackh<T: sealed::VectorUnpackh>(a: T) -> <T as sealed::VectorUnpackh>::Result {
+    a.vec_unpackh()
+}
+
+/// Vector Unpack Low
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_unpackl<T: sealed::VectorUnpackl>(a: T) -> <T as sealed::VectorUnpackl>::Result {
+    a.vec_unpackl()
+}
+
 /// Merges the least significant ("low") halves of two vectors.
 #[inline]
 #[target_feature(enable = "vector")]
@@ -4058,7 +4163,28 @@ mod tests {
     }
 
     test_vec_2! { test_vec_rl, vec_rl, u32x4,
-    [0x12345678, 0x9ABCDEF0, 0x0F0F0F0F, 0x12345678],
-    [4, 8, 12, 68],
-    [0x23456781, 0xBCDEF09A, 0xF0F0F0F0, 0x23456781] }
+        [0x12345678, 0x9ABCDEF0, 0x0F0F0F0F, 0x12345678],
+        [4, 8, 12, 68],
+        [0x23456781, 0xBCDEF09A, 0xF0F0F0F0, 0x23456781]
+    }
+
+    test_vec_1! { test_vec_unpackh_i, vec_unpackh, i16x8 -> i32x4,
+        [0x1234, -2, 0x0F0F, -32768, 0, 0, 0, 0],
+        [0x1234, -2, 0x0F0F, -32768]
+    }
+
+    test_vec_1! { test_vec_unpackh_u, vec_unpackh, u16x8 -> u32x4,
+        [0x1234, 0xFFFF, 0x0F0F, 0x8000, 0, 0, 0, 0],
+        [0x1234, 0xFFFF, 0x0F0F, 0x8000]
+    }
+
+    test_vec_1! { test_vec_unpackl_i, vec_unpackl, i16x8 -> i32x4,
+        [0, 0, 0, 0, 0x1234, -2, 0x0F0F, -32768],
+        [0x1234, -2, 0x0F0F, -32768]
+    }
+
+    test_vec_1! { test_vec_unpackl_u, vec_unpackl, u16x8 -> u32x4,
+        [0, 0, 0, 0, 0x1234, 0xFFFF, 0x0F0F, 0x8000],
+        [0x1234, 0xFFFF, 0x0F0F, 0x8000]
+    }
 }

From ef8f79ed2b1427071b4747841f3dc4ac7caab29a Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Mon, 3 Mar 2025 22:22:08 +0100
Subject: [PATCH 18/57] add `vec_avg`

---
 crates/core_arch/src/s390x/vector.rs | 42 ++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 84f83423d2..6a10995096 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -174,6 +174,16 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vupllb"] fn vupllb (a: vector_unsigned_char) -> vector_unsigned_short;
     #[link_name = "llvm.s390.vupllh"] fn vupllh (a: vector_unsigned_short) -> vector_unsigned_int;
     #[link_name = "llvm.s390.vupllf"] fn vupllf (a: vector_unsigned_int) -> vector_unsigned_long_long;
+
+    #[link_name = "llvm.s390.vavgb"] fn vavgb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char;
+    #[link_name = "llvm.s390.vavgh"] fn vavgh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short;
+    #[link_name = "llvm.s390.vavgf"] fn vavgf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int;
+    #[link_name = "llvm.s390.vavgg"] fn vavgg(a: vector_signed_long_long, b: vector_signed_long_long) -> vector_signed_long_long;
+
+    #[link_name = "llvm.s390.vavglb"] fn vavglb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char;
+    #[link_name = "llvm.s390.vavglh"] fn vavglh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short;
+    #[link_name = "llvm.s390.vavglf"] fn vavglf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int;
+    #[link_name = "llvm.s390.vavglg"] fn vavglg(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> vector_unsigned_long_long;
 }
 
 impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -2356,6 +2366,24 @@ mod sealed {
     impl_vec_trait! {[VectorUnpackl vec_unpackl]+ vupllb (vector_bool_char) -> vector_bool_short}
     impl_vec_trait! {[VectorUnpackl vec_unpackl]+ vupllh (vector_bool_short) -> vector_bool_int}
     impl_vec_trait! {[VectorUnpackl vec_unpackl]+ vupllf (vector_bool_int) -> vector_bool_long_long}
+
+    test_impl! { vec_vavgb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char [ vavgb, vavgb ] }
+    test_impl! { vec_vavgh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short [ vavgh, vavgh ] }
+    test_impl! { vec_vavgf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int [ vavgf, vavgf ] }
+    test_impl! { vec_vavgg(a: vector_signed_long_long, b: vector_signed_long_long) -> vector_signed_long_long [ vavgg, vavgg ] }
+
+    test_impl! { vec_vavglb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char [ vavglb, vavglb ] }
+    test_impl! { vec_vavglh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short [ vavglh, vavglh ] }
+    test_impl! { vec_vavglf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int [ vavglf, vavglf ] }
+    test_impl! { vec_vavglg(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> vector_unsigned_long_long [ vavglg, vavglg ] }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorAvg<Other> {
+        type Result;
+        unsafe fn vec_avg(self, b: Other) -> Self::Result;
+    }
+
+    impl_vec_trait! { [VectorAvg vec_avg] 2 (vec_vavglb, vec_vavgb, vec_vavglh, vec_vavgh, vec_vavglf, vec_vavgf, vec_vavglg, vec_vavgg) }
 }
 
 /// Load Count to Block Boundary
@@ -2732,6 +2760,14 @@ where
     a.vec_rint()
 }
 
+/// Vector Average
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_avg<T: sealed::VectorAvg<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_avg(b)
+}
+
 /// Vector Shift Left
 #[inline]
 #[target_feature(enable = "vector")]
@@ -4187,4 +4223,10 @@ mod tests {
         [0, 0, 0, 0, 0x1234, 0xFFFF, 0x0F0F, 0x8000],
         [0x1234, 0xFFFF, 0x0F0F, 0x8000]
     }
+
+    test_vec_2! { test_vec_avg, vec_avg, u32x4,
+        [2, 1, u32::MAX, 0],
+        [4, 2, 2, 0],
+        [3, (1u32 + 2).div_ceil(2), (u32::MAX as u64 + 2u64).div_ceil(2) as u32, 0]
+    }
 }

From 0dab41523b52585140a1f94d94b629d49c5712d0 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Mon, 3 Mar 2025 22:28:51 +0100
Subject: [PATCH 19/57] add `vec_checksum`

---
 crates/core_arch/src/s390x/vector.rs | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 6a10995096..8527f986e3 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -184,6 +184,8 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vavglh"] fn vavglh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short;
     #[link_name = "llvm.s390.vavglf"] fn vavglf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int;
     #[link_name = "llvm.s390.vavglg"] fn vavglg(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> vector_unsigned_long_long;
+
+    #[link_name = "llvm.s390.vcksm"] fn vcksm(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int;
 }
 
 impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -3383,6 +3385,15 @@ pub unsafe fn vec_msub<T: sealed::VectorMadd>(a: T, b: T, c: T) -> T {
     a.vec_msub(b, c)
 }
 
+/// Vector Checksum
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vcksm))]
+pub unsafe fn vec_checksum(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int {
+    vcksm(a, b)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -4229,4 +4240,10 @@ mod tests {
         [4, 2, 2, 0],
         [3, (1u32 + 2).div_ceil(2), (u32::MAX as u64 + 2u64).div_ceil(2) as u32, 0]
     }
+
+    test_vec_2! { test_vec_checksum, vec_checksum, u32x4,
+        [1, 2, 3, u32::MAX],
+        [5, 6, 7, 8],
+        [0, 12, 0, 0]
+    }
 }

From ad4d6937663ba40413a875e08471db629a370228 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Tue, 4 Mar 2025 13:28:48 +0100
Subject: [PATCH 20/57] add `vec_add_u128`, `vec_addc_u128`, `vec_adde_u128`
 and `vec_addce_u128`

---
 crates/core_arch/src/lib.rs          |   3 +-
 crates/core_arch/src/s390x/vector.rs | 120 +++++++++++++++++++++++++++
 2 files changed, 122 insertions(+), 1 deletion(-)

diff --git a/crates/core_arch/src/lib.rs b/crates/core_arch/src/lib.rs
index 16e52d2d9d..b8f4c2625e 100644
--- a/crates/core_arch/src/lib.rs
+++ b/crates/core_arch/src/lib.rs
@@ -38,7 +38,8 @@
     x86_amx_intrinsics,
     f16,
     keylocker_x86,
-    aarch64_unstable_target_feature
+    aarch64_unstable_target_feature,
+    bigint_helper_methods
 )]
 #![cfg_attr(test, feature(test, abi_vectorcall, stdarch_internal))]
 #![deny(clippy::missing_inline_in_public_items)]
diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 8527f986e3..112f86a3e8 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -122,6 +122,8 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vsbiq"] fn vsbiq(a: u128, b: u128, c: u128) -> u128;
     #[link_name = "llvm.s390.vsbcbiq"] fn vsbcbiq(a: u128, b: u128, c: u128) -> u128;
 
+    #[link_name = "llvm.s390.vacq"] fn vacq(a: u128, b: u128, c: u128) -> u128;
+
     #[link_name = "llvm.s390.vscbib"] fn vscbib(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char;
     #[link_name = "llvm.s390.vscbih"] fn vscbih(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short;
     #[link_name = "llvm.s390.vscbif"] fn vscbif(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int;
@@ -3085,6 +3087,24 @@ pub unsafe fn vec_sum4<T: sealed::VectorSum4>(a: T, b: T) -> vector_unsigned_int
     a.vec_sum4(b)
 }
 
+/// Vector Addition unsigned 128-bits
+///
+/// Adds unsigned quadword values.
+///
+/// This function operates on the vectors as 128-bit unsigned integers. It returns low 128 bits of a + b.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vaq))]
+pub unsafe fn vec_add_u128(
+    a: vector_unsigned_char,
+    b: vector_unsigned_char,
+) -> vector_unsigned_char {
+    let a: u128 = transmute(a);
+    let b: u128 = transmute(b);
+    transmute(a.wrapping_add(b))
+}
+
 /// Vector Subtract unsigned 128-bits
 ///
 /// Subtracts unsigned quadword values.
@@ -3130,9 +3150,63 @@ pub unsafe fn vec_subc_u128(
     a: vector_unsigned_char,
     b: vector_unsigned_char,
 ) -> vector_unsigned_char {
+    // FIXME(llvm) sadly this does not work https://github.com/llvm/llvm-project/issues/129608
+    // let a: u128 = transmute(a);
+    // let b: u128 = transmute(b);
+    // transmute(!a.overflowing_sub(b).1 as u128)
     transmute(vscbiq(transmute(a), transmute(b)))
 }
 
+/// Vector Add Compute Carryout unsigned 128-bits
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vaccq))]
+pub unsafe fn vec_addc_u128(
+    a: vector_unsigned_char,
+    b: vector_unsigned_char,
+) -> vector_unsigned_char {
+    let a: u128 = transmute(a);
+    let b: u128 = transmute(b);
+    transmute(a.overflowing_add(b).1 as u128)
+}
+
+/// Vector Add With Carry unsigned 128-bits
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vacq))]
+pub unsafe fn vec_adde_u128(
+    a: vector_unsigned_char,
+    b: vector_unsigned_char,
+    c: vector_unsigned_char,
+) -> vector_unsigned_char {
+    let a: u128 = transmute(a);
+    let b: u128 = transmute(b);
+    let c: u128 = transmute(c);
+    // FIXME(llvm) sadly this does not work
+    //     let (d, _carry) = a.carrying_add(b, c & 1 != 0);
+    //     transmute(d)
+    transmute(vacq(a, b, c))
+}
+
+/// Vector Add With Carry Compute Carry unsigned 128-bits
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vacccq))]
+pub unsafe fn vec_addec_u128(
+    a: vector_unsigned_char,
+    b: vector_unsigned_char,
+    c: vector_unsigned_char,
+) -> vector_unsigned_char {
+    let a: u128 = transmute(a);
+    let b: u128 = transmute(b);
+    let c: u128 = transmute(c);
+    let (_d, carry) = a.carrying_add(b, c & 1 != 0);
+    transmute(carry as u128)
+}
+
 /// Subtracts unsigned quadword values with carry bit from a previous operation.
 ///
 /// This function operates on the vectors as 128-bit unsigned integers. It returns a vector containing the result of subtracting of b from a,
@@ -4246,4 +4320,50 @@ mod tests {
         [5, 6, 7, 8],
         [0, 12, 0, 0]
     }
+
+    test_vec_2! { test_vec_add_u128, vec_add_u128, u8x16,
+        [0x01, 0x05, 0x0F, 0x1A, 0x2F, 0x3F, 0x50, 0x65,
+                              0x7A, 0x8F, 0x9A, 0xAD, 0xB0, 0xC3, 0xD5, 0xE8],
+        [0xF0, 0xEF, 0xC3, 0xB1, 0x92, 0x71, 0x5A, 0x43,
+                              0x3B, 0x29, 0x13, 0x04, 0xD7, 0xA1, 0x8C, 0x76],
+        [0xF1, 0xF4, 0xD2, 0xCB, 0xC1, 0xB0, 0xAA, 0xA8, 0xB5, 0xB8, 0xAD, 0xB2, 0x88, 0x65, 0x62, 0x5E]
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_addc_u128() {
+        unsafe {
+            let a = u128::MAX;
+            let b = 1u128;
+
+            let d: u128 = transmute(vec_addc_u128(transmute(a), transmute(b)));
+            assert!(a.checked_add(b).is_none());
+            assert_eq!(d, 1);
+
+            let a = 1u128;
+            let b = 1u128;
+
+            let d: u128 = transmute(vec_addc_u128(transmute(a), transmute(b)));
+            assert!(a.checked_add(b).is_some());
+            assert_eq!(d, 0);
+        }
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_subc_u128() {
+        unsafe {
+            let a = 0u128;
+            let b = 1u128;
+
+            let d: u128 = transmute(vec_subc_u128(transmute(a), transmute(b)));
+            assert!(a.checked_sub(b).is_none());
+            assert_eq!(d, 0);
+
+            let a = 1u128;
+            let b = 1u128;
+
+            let d: u128 = transmute(vec_subc_u128(transmute(a), transmute(b)));
+            assert!(a.checked_sub(b).is_some());
+            assert_eq!(d, 1);
+        }
+    }
 }

From e61e147160f0738fa17eeb57c1fe821108626021 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Tue, 4 Mar 2025 14:45:59 +0100
Subject: [PATCH 21/57] add `vec_mule`

---
 crates/core_arch/src/s390x/vector.rs | 81 ++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 112f86a3e8..5bf4b12e98 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -188,6 +188,14 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vavglg"] fn vavglg(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> vector_unsigned_long_long;
 
     #[link_name = "llvm.s390.vcksm"] fn vcksm(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int;
+
+    #[link_name = "llvm.s390.vmeb"] fn vmeb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short;
+    #[link_name = "llvm.s390.vmeh"] fn vmeh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int;
+    #[link_name = "llvm.s390.vmef"] fn vmef(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long;
+
+    #[link_name = "llvm.s390.vmleb"] fn vmleb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short;
+    #[link_name = "llvm.s390.vmleh"] fn vmleh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int;
+    #[link_name = "llvm.s390.vmlef"] fn vmlef(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long;
 }
 
 impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -2388,6 +2396,59 @@ mod sealed {
     }
 
     impl_vec_trait! { [VectorAvg vec_avg] 2 (vec_vavglb, vec_vavgb, vec_vavglh, vec_vavgh, vec_vavglf, vec_vavgf, vec_vavglg, vec_vavgg) }
+
+    macro_rules! impl_mul {
+        ([$Trait:ident $m:ident] $fun:ident ($a:ty, $b:ty) -> $r:ty) => {
+            #[unstable(feature = "stdarch_s390x", issue = "135681")]
+            impl $Trait<$r> for $a {
+                #[inline]
+                #[target_feature(enable = "vector")]
+                unsafe fn $m(self, b: $b) -> $r {
+                    $fun(transmute(self), transmute(b))
+                }
+            }
+        };
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorMule<Result> {
+        unsafe fn vec_mule(self, b: Self) -> Result;
+    }
+
+    // FIXME(llvm) sadly this does not yet work https://github.com/llvm/llvm-project/issues/129705
+    //    #[target_feature(enable = "vector")]
+    //    #[cfg_attr(test, assert_instr(vmleh))]
+    //    unsafe fn vec_vmleh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int {
+    //        let even_a: vector_unsigned_int = simd_as(simd_shuffle::<_, _, u16x4>(
+    //            a,
+    //            a,
+    //            const { ShuffleMask([0, 2, 4, 6]) },
+    //        ));
+    //
+    //        let even_b: vector_unsigned_int = simd_as(simd_shuffle::<_, _, u16x4>(
+    //            b,
+    //            b,
+    //            const { ShuffleMask([0, 2, 4, 6]) },
+    //        ));
+    //
+    //        simd_mul(even_a, even_b)
+    //    }
+
+    test_impl! { vec_vmeb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short [ vmeb, vmeb ] }
+    test_impl! { vec_vmeh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int[ vmeh, vmeh ] }
+    test_impl! { vec_vmef(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long [ vmef, vmef ] }
+
+    test_impl! { vec_vmleb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short [ vmleb, vmleb ] }
+    test_impl! { vec_vmleh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int[ vmleh, vmleh ] }
+    test_impl! { vec_vmlef(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long [ vmlef, vmlef ] }
+
+    impl_mul!([VectorMule vec_mule] vec_vmeb (vector_signed_char, vector_signed_char) -> vector_signed_short );
+    impl_mul!([VectorMule vec_mule] vec_vmeh (vector_signed_short, vector_signed_short) -> vector_signed_int);
+    impl_mul!([VectorMule vec_mule] vec_vmef (vector_signed_int, vector_signed_int) -> vector_signed_long_long );
+
+    impl_mul!([VectorMule vec_mule] vec_vmleb (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_short );
+    impl_mul!([VectorMule vec_mule] vec_vmleh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int);
+    impl_mul!([VectorMule vec_mule] vec_vmlef (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long );
 }
 
 /// Load Count to Block Boundary
@@ -3468,6 +3529,14 @@ pub unsafe fn vec_checksum(a: vector_unsigned_int, b: vector_unsigned_int) -> ve
     vcksm(a, b)
 }
 
+/// Vector Multiply Even
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_mule<T: sealed::VectorMule<U>, U>(a: T, b: T) -> U {
+    a.vec_mule(b)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -4366,4 +4435,16 @@ mod tests {
             assert_eq!(d, 1);
         }
     }
+
+    test_vec_2! { test_vec_mule_u, vec_mule, u16x8, u16x8 -> u32x4,
+        [0xFFFF, 0, 2, 0, 2, 0, 1, 0],
+        [0xFFFF, 0, 4, 0, 0xFFFF, 0, 2, 0],
+        [0xFFFE_0001, 8, 0x0001_FFFE, 2]
+    }
+
+    test_vec_2! { test_vec_mule_i, vec_mule, i16x8, i16x8 -> i32x4,
+        [i16::MIN, 0, -2, 0, 2, 0, 1, 0],
+        [i16::MIN, 0, 4, 0, i16::MAX, 0, 2, 0],
+        [0x4000_0000, -8, 0xFFFE, 2]
+    }
 }

From ab66d1448117ef2ce1c84b9400d81c58f3f045a7 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Tue, 4 Mar 2025 15:10:10 +0100
Subject: [PATCH 22/57] add `vec_nmsub`

---
 crates/core_arch/src/s390x/vector.rs | 54 +++++++++++++++++++++++++---
 1 file changed, 50 insertions(+), 4 deletions(-)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 5bf4b12e98..d90ad6457b 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -649,6 +649,47 @@ mod sealed {
     impl_vec_trait! { [VectorNabs vec_nabs] vec_nabs_f32 (vector_float) }
     impl_vec_trait! { [VectorNabs vec_nabs] vec_nabs_f64 (vector_double) }
 
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorNmsub {
+        unsafe fn vec_nmsub(self, b: Self, c: Self) -> Self;
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(
+        all(test, target_feature = "vector-enhancements-2"),
+        assert_instr(vfnmssb)
+    )]
+    unsafe fn vec_nmsub_f32(a: vector_float, b: vector_float, c: vector_float) -> vector_float {
+        simd_neg(simd_fma(a, b, simd_neg(c)))
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorNmsub for vector_float {
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_nmsub(self, b: Self, c: Self) -> Self {
+            vec_nmsub_f32(self, b, c)
+        }
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(
+        all(test, target_feature = "vector-enhancements-2"),
+        assert_instr(vfnmsdb)
+    )]
+    unsafe fn vec_nmsub_f64(a: vector_double, b: vector_double, c: vector_double) -> vector_double {
+        simd_neg(simd_fma(a, b, simd_neg(c)))
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorNmsub for vector_double {
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_nmsub(self, b: Self, c: Self) -> Self {
+            vec_nmsub_f64(self, b, c)
+        }
+    }
+
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorSplat {
         unsafe fn vec_splat<const IMM: u32>(self) -> Self;
@@ -2571,13 +2612,18 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_nabs<T>(a: T) -> T
-where
-    T: sealed::VectorNabs,
-{
+pub unsafe fn vec_nabs<T: sealed::VectorNabs>(a: T) -> T {
     a.vec_nabs()
 }
 
+/// Vector Negative Multiply Subtract
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_nmsub<T: sealed::VectorNmsub>(a: T, b: T, c: T) -> T {
+    a.vec_nmsub(b, c)
+}
+
 /// Vector square root.
 #[inline]
 #[target_feature(enable = "vector")]

From 6956f2abe7ecdb8fa579ac5a91b9cb41f30ced68 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Tue, 4 Mar 2025 16:00:13 +0100
Subject: [PATCH 23/57] clarify fixme waiting for a newer llvm version

---
 crates/core_arch/src/s390x/vector.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index d90ad6457b..d43ca01c57 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -1009,7 +1009,8 @@ mod sealed {
     test_impl! { vec_roundc_f32 (a: vector_float) -> vector_float [nearbyint_v4f32,  "vector-enhancements-1" vfisb] }
     test_impl! { vec_roundc_f64 (a: vector_double) -> vector_double [nearbyint_v2f64, vfidb] }
 
-    // FIXME(llvm) roundeven does not yet lower to vfidb (but should in the future)
+    // FIXME(llvm) llvm trunk already lowers roundeven to vfidb, but rust does not use it yet
+    // use https://godbolt.org/z/cWq95fexe to check, and enable the instruction test when it works
     test_impl! { vec_round_f32 (a: vector_float) -> vector_float [roundeven_v4f32, _] }
     test_impl! { vec_round_f64 (a: vector_double) -> vector_double [roundeven_v2f64, _] }
 

From 58fa7a7e05bd05c7c72b1e9bed6ce0892fc40e70 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Tue, 4 Mar 2025 16:20:04 +0100
Subject: [PATCH 24/57] add `vec_gfmsum`

---
 crates/core_arch/src/s390x/vector.rs | 37 ++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index d43ca01c57..ae9541b2b6 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -196,6 +196,10 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vmleb"] fn vmleb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short;
     #[link_name = "llvm.s390.vmleh"] fn vmleh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int;
     #[link_name = "llvm.s390.vmlef"] fn vmlef(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long;
+
+    #[link_name = "llvm.s390.vgfmb"] fn vgfmb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short;
+    #[link_name = "llvm.s390.vgfmh"] fn vgfmh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int;
+    #[link_name = "llvm.s390.vgfmf"] fn vgfmf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long;
 }
 
 impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -2491,6 +2495,19 @@ mod sealed {
     impl_mul!([VectorMule vec_mule] vec_vmleb (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_short );
     impl_mul!([VectorMule vec_mule] vec_vmleh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int);
     impl_mul!([VectorMule vec_mule] vec_vmlef (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long );
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorGfmsum<Result> {
+        unsafe fn vec_gfmsum(self, b: Self) -> Result;
+    }
+
+    test_impl! { vec_vgfmb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short [ vgfmb, vgfmb ] }
+    test_impl! { vec_vgfmh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int[ vgfmh, vgfmh] }
+    test_impl! { vec_vgfmf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long [ vgfmf, vgfmf ] }
+
+    impl_mul!([VectorGfmsum vec_gfmsum] vec_vgfmb (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_short );
+    impl_mul!([VectorGfmsum vec_gfmsum] vec_vgfmh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int);
+    impl_mul!([VectorGfmsum vec_gfmsum] vec_vgfmf (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long );
 }
 
 /// Load Count to Block Boundary
@@ -3584,6 +3601,14 @@ pub unsafe fn vec_mule<T: sealed::VectorMule<U>, U>(a: T, b: T) -> U {
     a.vec_mule(b)
 }
 
+/// Vector Galois Field Multiply Sum
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_gfmsum<T: sealed::VectorGfmsum<U>, U>(a: T, b: T) -> U {
+    a.vec_gfmsum(b)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -4494,4 +4519,16 @@ mod tests {
         [i16::MIN, 0, 4, 0, i16::MAX, 0, 2, 0],
         [0x4000_0000, -8, 0xFFFE, 2]
     }
+
+    test_vec_2! { test_vec_gfmsum_1, vec_gfmsum, u16x8, u16x8 -> u32x4,
+        [0x1234, 0x5678, 0x9ABC, 0xDEF0, 0x1357, 0x2468, 0xACE0, 0xBDF0],
+        [0xFFFF, 0x0001, 0x8000, 0x7FFF, 0xAAAA, 0x5555, 0x1234, 0x5678],
+        [0xE13A794, 0x68764A50, 0x94AA3E, 0x2C93F300]
+    }
+
+    test_vec_2! { test_vec_gfmsum_2, vec_gfmsum, u16x8, u16x8 -> u32x4,
+        [0x0000, 0xFFFF, 0xAAAA, 0x5555, 0x1234, 0x5678, 0x9ABC, 0xDEF0],
+        [0xFFFF, 0x0000, 0x5555, 0xAAAA, 0x0001, 0x8000, 0x7FFF, 0x1357],
+        [0, 0, 0x2B3C1234, 0x3781D244]
+    }
 }

From 75e9c9e21f5ceebd961bc384c9fefd120435079e Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Tue, 4 Mar 2025 16:44:58 +0100
Subject: [PATCH 25/57] add `vec_gfmsum_128`

---
 crates/core_arch/src/s390x/vector.rs | 29 ++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index ae9541b2b6..907627f59e 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -200,6 +200,8 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vgfmb"] fn vgfmb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short;
     #[link_name = "llvm.s390.vgfmh"] fn vgfmh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int;
     #[link_name = "llvm.s390.vgfmf"] fn vgfmf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long;
+    #[link_name = "llvm.s390.vgfmg"] fn vgfmg(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> u128;
+
 }
 
 impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -3609,6 +3611,18 @@ pub unsafe fn vec_gfmsum<T: sealed::VectorGfmsum<U>, U>(a: T, b: T) -> U {
     a.vec_gfmsum(b)
 }
 
+/// Vector Galois Field Multiply Sum 128-bits
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vgfmg))]
+pub unsafe fn vec_gfmsum_128(
+    a: vector_unsigned_long_long,
+    b: vector_unsigned_long_long,
+) -> vector_unsigned_char {
+    transmute(vgfmg(a, b))
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -4531,4 +4545,19 @@ mod tests {
         [0xFFFF, 0x0000, 0x5555, 0xAAAA, 0x0001, 0x8000, 0x7FFF, 0x1357],
         [0, 0, 0x2B3C1234, 0x3781D244]
     }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_gfmsum_128() {
+        let a = vector_unsigned_long_long([1, 2]);
+        let b = vector_unsigned_long_long([3, 4]);
+
+        let d: u128 = unsafe { transmute(vec_gfmsum_128(a, b)) };
+        assert_eq!(d, 11);
+
+        let a = vector_unsigned_long_long([0x0101010101010101, 0x0202020202020202]);
+        let b = vector_unsigned_long_long([0x0404040404040404, 0x0505050505050505]);
+
+        let d: u128 = unsafe { transmute(vec_gfmsum_128(a, b)) };
+        assert_eq!(d, 0xE000E000E000E000E000E000E000E);
+    }
 }

From eba5adf7f122c82df8a2c35af0850e8caf34294d Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Tue, 4 Mar 2025 16:56:13 +0100
Subject: [PATCH 26/57] add `vec_gfmsum_accum` and `vec_gfmsum_accum_128`

---
 crates/core_arch/src/s390x/vector.rs | 67 ++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 907627f59e..d3d40c3b71 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -202,6 +202,10 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vgfmf"] fn vgfmf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long;
     #[link_name = "llvm.s390.vgfmg"] fn vgfmg(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> u128;
 
+    #[link_name = "llvm.s390.vgfmab"] fn vgfmab(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_short) -> vector_unsigned_short;
+    #[link_name = "llvm.s390.vgfmah"] fn vgfmah(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int) -> vector_unsigned_int;
+    #[link_name = "llvm.s390.vgfmaf"] fn vgfmaf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_long_long) -> vector_unsigned_long_long;
+    #[link_name = "llvm.s390.vgfmag"] fn vgfmag(a: vector_unsigned_long_long, b: vector_unsigned_long_long, c: u128) -> u128;
 }
 
 impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -2510,6 +2514,44 @@ mod sealed {
     impl_mul!([VectorGfmsum vec_gfmsum] vec_vgfmb (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_short );
     impl_mul!([VectorGfmsum vec_gfmsum] vec_vgfmh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int);
     impl_mul!([VectorGfmsum vec_gfmsum] vec_vgfmf (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long );
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorGfmsumAccum {
+        type Result;
+        unsafe fn vec_gfmsum_accum(self, b: Self, c: Self::Result) -> Self::Result;
+    }
+
+    test_impl! { vec_vgfmab(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_short) -> vector_unsigned_short [ vgfmab, vgfmab ] }
+    test_impl! { vec_vgfmah(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int) -> vector_unsigned_int[ vgfmah, vgfmah] }
+    test_impl! { vec_vgfmaf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_long_long) -> vector_unsigned_long_long [ vgfmaf, vgfmaf ] }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorGfmsumAccum for vector_unsigned_char {
+        type Result = vector_unsigned_short;
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_gfmsum_accum(self, b: Self, c: Self::Result) -> Self::Result {
+            vec_vgfmab(self, b, c)
+        }
+    }
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorGfmsumAccum for vector_unsigned_short {
+        type Result = vector_unsigned_int;
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_gfmsum_accum(self, b: Self, c: Self::Result) -> Self::Result {
+            vec_vgfmah(self, b, c)
+        }
+    }
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorGfmsumAccum for vector_unsigned_int {
+        type Result = vector_unsigned_long_long;
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_gfmsum_accum(self, b: Self, c: Self::Result) -> Self::Result {
+            vec_vgfmaf(self, b, c)
+        }
+    }
 }
 
 /// Load Count to Block Boundary
@@ -3611,6 +3653,18 @@ pub unsafe fn vec_gfmsum<T: sealed::VectorGfmsum<U>, U>(a: T, b: T) -> U {
     a.vec_gfmsum(b)
 }
 
+/// Vector Galois Field Multiply Sum
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_gfmsum_accum<T: sealed::VectorGfmsumAccum>(
+    a: T,
+    b: T,
+    c: T::Result,
+) -> T::Result {
+    a.vec_gfmsum_accum(b, c)
+}
+
 /// Vector Galois Field Multiply Sum 128-bits
 #[inline]
 #[target_feature(enable = "vector")]
@@ -3623,6 +3677,19 @@ pub unsafe fn vec_gfmsum_128(
     transmute(vgfmg(a, b))
 }
 
+/// Vector Galois Field Multiply Sum and Accumulate 128-bits
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vgfmag))]
+pub unsafe fn vec_gfmsum_accum_128(
+    a: vector_unsigned_long_long,
+    b: vector_unsigned_long_long,
+    c: vector_unsigned_char,
+) -> vector_unsigned_char {
+    transmute(vgfmag(a, b, transmute(c)))
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;

From 3c1bdfe7289be35efa106507fa8119cbd0f1940f Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Tue, 4 Mar 2025 18:19:10 +0100
Subject: [PATCH 27/57] add `vec_nmadd`

---
 crates/core_arch/src/s390x/vector.rs | 49 ++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index d3d40c3b71..3ec27b579c 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -700,6 +700,47 @@ mod sealed {
         }
     }
 
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorNmadd {
+        unsafe fn vec_nmadd(self, b: Self, c: Self) -> Self;
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(
+        all(test, target_feature = "vector-enhancements-2"),
+        assert_instr(vfnmasb)
+    )]
+    unsafe fn vec_nmadd_f32(a: vector_float, b: vector_float, c: vector_float) -> vector_float {
+        simd_neg(simd_fma(a, b, c))
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorNmadd for vector_float {
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_nmadd(self, b: Self, c: Self) -> Self {
+            vec_nmadd_f32(self, b, c)
+        }
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(
+        all(test, target_feature = "vector-enhancements-2"),
+        assert_instr(vfnmadb)
+    )]
+    unsafe fn vec_nmadd_f64(a: vector_double, b: vector_double, c: vector_double) -> vector_double {
+        simd_neg(simd_fma(a, b, c))
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorNmadd for vector_double {
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_nmadd(self, b: Self, c: Self) -> Self {
+            vec_nmadd_f64(self, b, c)
+        }
+    }
+
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorSplat {
         unsafe fn vec_splat<const IMM: u32>(self) -> Self;
@@ -2678,6 +2719,14 @@ pub unsafe fn vec_nabs<T: sealed::VectorNabs>(a: T) -> T {
     a.vec_nabs()
 }
 
+/// Vector Negative Multiply Add
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_nmadd<T: sealed::VectorNmadd>(a: T, b: T, c: T) -> T {
+    a.vec_nmadd(b, c)
+}
+
 /// Vector Negative Multiply Subtract
 #[inline]
 #[target_feature(enable = "vector")]

From 31be70eac55b009ace329ce5e359725dea5b6776 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Tue, 4 Mar 2025 20:15:42 +0100
Subject: [PATCH 28/57] add `vec_gather_element`

---
 crates/core_arch/src/s390x/macros.rs |  16 ++++
 crates/core_arch/src/s390x/vector.rs | 107 +++++++++++++++++++++++++++
 2 files changed, 123 insertions(+)

diff --git a/crates/core_arch/src/s390x/macros.rs b/crates/core_arch/src/s390x/macros.rs
index 2c40fedae2..bf2ef35705 100644
--- a/crates/core_arch/src/s390x/macros.rs
+++ b/crates/core_arch/src/s390x/macros.rs
@@ -250,6 +250,19 @@ macro_rules! l_t_t {
         u8
     };
 
+    (vector_bool_long_long ) => {
+        u64
+    };
+    (vector_bool_int ) => {
+        u32
+    };
+    (vector_bool_short ) => {
+        u16
+    };
+    (vector_bool_char ) => {
+        u8
+    };
+
     (vector_float) => {
         f32
     };
@@ -338,6 +351,9 @@ macro_rules! t_u {
     (vector_bool_int) => {
         vector_unsigned_int
     };
+    (vector_bool_long_long) => {
+        vector_unsigned_long_long
+    };
     (vector_unsigned_char) => {
         vector_unsigned_char
     };
diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 3ec27b579c..761660ee32 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -2593,6 +2593,78 @@ mod sealed {
             vec_vgfmaf(self, b, c)
         }
     }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vgef, D = 3))]
+    unsafe fn vgef<const D: u32>(
+        a: vector_unsigned_int,
+        b: vector_unsigned_int,
+        c: *const u32,
+    ) -> vector_unsigned_int {
+        static_assert_uimm_bits!(D, 2);
+        let offset: u32 = simd_extract(b, D);
+        let ptr = c.byte_offset(offset as isize);
+        let value = ptr.read();
+        simd_insert(a, D, value)
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vgeg, D = 1))]
+    unsafe fn vgeg<const D: u32>(
+        a: vector_unsigned_long_long,
+        b: vector_unsigned_long_long,
+        c: *const u64,
+    ) -> vector_unsigned_long_long {
+        static_assert_uimm_bits!(D, 1);
+        let offset: u64 = simd_extract(b, D);
+        let ptr = c.byte_offset(offset as isize);
+        let value = ptr.read();
+        simd_insert(a, D, value)
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorGatherElement {
+        type Element;
+        type Offset;
+        unsafe fn vec_gather_element<const D: u32>(
+            self,
+            b: Self::Offset,
+            c: *const Self::Element,
+        ) -> Self;
+    }
+
+    macro_rules! impl_vec_gather_element {
+        ($($instr:ident $ty:ident)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorGatherElement for $ty {
+                    type Element = l_t_t!($ty);
+                    type Offset = t_u!($ty);
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_gather_element<const D: u32>(self, b: Self::Offset, c: *const Self::Element) -> Self {
+                        transmute($instr::<D>(transmute(self), b, c.cast()))
+                    }
+                }
+            )*
+        }
+    }
+
+    impl_vec_gather_element! {
+        vgef vector_signed_int
+        vgef vector_bool_int
+        vgef vector_unsigned_int
+
+        vgeg vector_signed_long_long
+        vgeg vector_bool_long_long
+        vgeg vector_unsigned_long_long
+
+        vgef vector_float
+        vgeg vector_double
+    }
 }
 
 /// Load Count to Block Boundary
@@ -3739,6 +3811,17 @@ pub unsafe fn vec_gfmsum_accum_128(
     transmute(vgfmag(a, b, transmute(c)))
 }
 
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_gather_element<T: sealed::VectorGatherElement, const D: u32>(
+    a: T,
+    b: T::Offset,
+    c: *const T::Element,
+) -> T {
+    a.vec_gather_element::<D>(b, c)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -4676,4 +4759,28 @@ mod tests {
         let d: u128 = unsafe { transmute(vec_gfmsum_128(a, b)) };
         assert_eq!(d, 0xE000E000E000E000E000E000E000E);
     }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_gather_element() {
+        let a1: [u32; 10] = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19];
+        let a2: [u32; 10] = [20, 21, 22, 23, 24, 25, 26, 27, 28, 29];
+
+        let v1 = vector_unsigned_int([1, 2, 3, 4]);
+        let v2 = vector_unsigned_int([1, 2, 3, 4]);
+
+        let sizeof_int = core::mem::size_of::<u32>() as u32;
+        let v3 = vector_unsigned_int([
+            5 * sizeof_int,
+            8 * sizeof_int,
+            9 * sizeof_int,
+            6 * sizeof_int,
+        ]);
+
+        unsafe {
+            let d1 = vec_gather_element::<_, 0>(v1, v3, a1.as_ptr());
+            assert_eq!(d1.as_array(), &[15, 2, 3, 4]);
+            let d2 = vec_gather_element::<_, 0>(v2, v3, a2.as_ptr());
+            assert_eq!(d2.as_array(), &[25, 2, 3, 4]);
+        }
+    }
 }

From 7777e5c509f50bccf80328a318b6acdf5d9f9c48 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Tue, 4 Mar 2025 20:25:26 +0100
Subject: [PATCH 29/57] add `vec_bperm_u128`

---
 crates/core_arch/src/s390x/vector.rs | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 761660ee32..eb60812c54 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -206,6 +206,8 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vgfmah"] fn vgfmah(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int) -> vector_unsigned_int;
     #[link_name = "llvm.s390.vgfmaf"] fn vgfmaf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_long_long) -> vector_unsigned_long_long;
     #[link_name = "llvm.s390.vgfmag"] fn vgfmag(a: vector_unsigned_long_long, b: vector_unsigned_long_long, c: u128) -> u128;
+
+    #[link_name = "llvm.s390.vbperm"] fn vbperm(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_long_long;
 }
 
 impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -3811,6 +3813,19 @@ pub unsafe fn vec_gfmsum_accum_128(
     transmute(vgfmag(a, b, transmute(c)))
 }
 
+/// Vector Bit Permute
+#[inline]
+#[target_feature(enable = "vector-enhancements-1")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vbperm))]
+pub unsafe fn vec_bperm_u128(
+    a: vector_unsigned_char,
+    b: vector_unsigned_char,
+) -> vector_unsigned_long_long {
+    vbperm(a, b)
+}
+
+/// Vector Gather Element
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
@@ -4760,6 +4775,16 @@ mod tests {
         assert_eq!(d, 0xE000E000E000E000E000E000E000E);
     }
 
+    #[simd_test(enable = "vector-enhancements-1")]
+    fn test_vec_bperm_u128() {
+        let a = vector_unsigned_char([65, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]);
+        let b = vector_unsigned_char([
+            0, 0, 0, 0, 1, 1, 1, 1, 128, 128, 128, 128, 255, 255, 255, 255,
+        ]);
+        let d = unsafe { vec_bperm_u128(a, b) };
+        assert_eq!(d.as_array(), &[0xF00, 0]);
+    }
+
     #[simd_test(enable = "vector")]
     fn test_vec_gather_element() {
         let a1: [u32; 10] = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19];

From f3253592e0f72e3ae76c0bac40cff08efb8056b6 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Tue, 4 Mar 2025 20:59:18 +0100
Subject: [PATCH 30/57] add `vec_sel`

---
 crates/core_arch/src/s390x/macros.rs |  3 ++
 crates/core_arch/src/s390x/vector.rs | 74 ++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+)

diff --git a/crates/core_arch/src/s390x/macros.rs b/crates/core_arch/src/s390x/macros.rs
index bf2ef35705..4f0f84ec91 100644
--- a/crates/core_arch/src/s390x/macros.rs
+++ b/crates/core_arch/src/s390x/macros.rs
@@ -396,6 +396,9 @@ macro_rules! t_b {
     (vector_bool_int) => {
         vector_bool_int
     };
+    (vector_bool_long_long) => {
+        vector_bool_long_long
+    };
     (vector_signed_char) => {
         vector_bool_char
     };
diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index eb60812c54..f55c28428d 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -2667,6 +2667,58 @@ mod sealed {
         vgef vector_float
         vgeg vector_double
     }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorSel<Mask>: Sized {
+        unsafe fn vec_sel(self, b: Self, c: Mask) -> Self;
+    }
+
+    macro_rules! impl_vec_sel {
+        ($($ty:ident)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorSel<t_u!($ty)> for $ty {
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_sel(self, b: Self, c: t_u!($ty)) -> Self {
+                        let b = simd_and(b, transmute(c));
+                        let a = simd_and(self, simd_xor(transmute(c), transmute(vector_signed_char([!0; 16]))));
+                        simd_or(a, b)
+                    }
+                }
+
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorSel<t_b!($ty)> for $ty {
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_sel(self, b: Self, c: t_b!($ty)) -> Self {
+                        // defer to the implementation with an unsigned mask
+                        self.vec_sel(b, transmute::<_, t_u!($ty)>(c))
+                    }
+                }
+            )*
+        }
+    }
+
+    impl_vec_sel! {
+        vector_signed_char
+        vector_signed_short
+        vector_signed_int
+        vector_signed_long_long
+
+        vector_unsigned_char
+        vector_unsigned_short
+        vector_unsigned_int
+        vector_unsigned_long_long
+
+        vector_bool_char
+        vector_bool_short
+        vector_bool_int
+        vector_bool_long_long
+
+        vector_float
+        vector_double
+    }
 }
 
 /// Load Count to Block Boundary
@@ -3837,6 +3889,14 @@ pub unsafe fn vec_gather_element<T: sealed::VectorGatherElement, const D: u32>(
     a.vec_gather_element::<D>(b, c)
 }
 
+/// Vector Select
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_sel<T: sealed::VectorSel<U>, U>(a: T, b: T, c: U) -> T {
+    a.vec_sel(b, c)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -4785,6 +4845,20 @@ mod tests {
         assert_eq!(d.as_array(), &[0xF00, 0]);
     }
 
+    #[simd_test(enable = "vector")]
+    fn test_vec_sel() {
+        let a = vector_signed_int([1, 2, 3, 4]);
+        let b = vector_signed_int([5, 6, 7, 8]);
+
+        let e = vector_unsigned_int([9, 10, 11, 12]);
+        let f = vector_unsigned_int([9, 9, 11, 11]);
+
+        let c: vector_bool_int = unsafe { simd_eq(e, f) };
+        assert_eq!(c.as_array(), &[!0, 0, !0, 0]);
+        let d: vector_signed_int = unsafe { vec_sel(a, b, c) };
+        assert_eq!(d.as_array(), &[5, 2, 7, 4]);
+    }
+
     #[simd_test(enable = "vector")]
     fn test_vec_gather_element() {
         let a1: [u32; 10] = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19];

From a9a6405ffc568809caa5ae604de39bb9164ee79e Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Tue, 4 Mar 2025 21:27:27 +0100
Subject: [PATCH 31/57] add `vec_scatter`

---
 crates/core_arch/src/s390x/vector.rs | 68 +++++++++++++++++++++++++++-
 1 file changed, 66 insertions(+), 2 deletions(-)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index f55c28428d..098e018fa2 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -2606,7 +2606,7 @@ mod sealed {
     ) -> vector_unsigned_int {
         static_assert_uimm_bits!(D, 2);
         let offset: u32 = simd_extract(b, D);
-        let ptr = c.byte_offset(offset as isize);
+        let ptr = c.byte_add(offset as usize);
         let value = ptr.read();
         simd_insert(a, D, value)
     }
@@ -2621,7 +2621,7 @@ mod sealed {
     ) -> vector_unsigned_long_long {
         static_assert_uimm_bits!(D, 1);
         let offset: u64 = simd_extract(b, D);
-        let ptr = c.byte_offset(offset as isize);
+        let ptr = c.byte_add(offset as usize);
         let value = ptr.read();
         simd_insert(a, D, value)
     }
@@ -2668,6 +2668,70 @@ mod sealed {
         vgeg vector_double
     }
 
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vscef, D = 3))]
+    unsafe fn vscef<const D: u32>(a: vector_unsigned_int, b: vector_unsigned_int, c: *mut u32) {
+        static_assert_uimm_bits!(D, 2);
+        let value = simd_extract(a, D);
+        let offset: u32 = simd_extract(b, D);
+        let ptr = c.byte_add(offset as usize);
+        ptr.write(value);
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vsceg, D = 1))]
+    unsafe fn vsceg<const D: u32>(
+        a: vector_unsigned_long_long,
+        b: vector_unsigned_long_long,
+        c: *mut u64,
+    ) {
+        static_assert_uimm_bits!(D, 1);
+        let value = simd_extract(a, D);
+        let offset: u64 = simd_extract(b, D);
+        let ptr = c.byte_add(offset as usize);
+        ptr.write(value);
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorScatterElement {
+        type Element;
+        type Offset;
+        unsafe fn vec_scatter_element<const D: u32>(self, b: Self::Offset, c: *mut Self::Element);
+    }
+
+    macro_rules! impl_vec_scatter_element {
+        ($($instr:ident $ty:ident)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorScatterElement for $ty {
+                    type Element = l_t_t!($ty);
+                    type Offset = t_u!($ty);
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_scatter_element<const D: u32>(self, b: Self::Offset, c: *mut Self::Element) {
+                        $instr::<D>(transmute(self), b, c.cast())
+                    }
+                }
+            )*
+        }
+    }
+
+    impl_vec_scatter_element! {
+        vscef vector_signed_int
+        vscef vector_bool_int
+        vscef vector_unsigned_int
+
+        vsceg vector_signed_long_long
+        vsceg vector_bool_long_long
+        vsceg vector_unsigned_long_long
+
+        vscef vector_float
+        vsceg vector_double
+    }
+
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorSel<Mask>: Sized {
         unsafe fn vec_sel(self, b: Self, c: Mask) -> Self;

From 25746d14bf5d73f82ad5dec251df78a467462bb3 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Wed, 5 Mar 2025 12:44:01 +0100
Subject: [PATCH 32/57] add `vec_fp_test_data_class`

---
 crates/core_arch/src/s390x/vector.rs | 125 +++++++++++++++++++++++++++
 1 file changed, 125 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 098e018fa2..7db2015964 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -208,6 +208,9 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vgfmag"] fn vgfmag(a: vector_unsigned_long_long, b: vector_unsigned_long_long, c: u128) -> u128;
 
     #[link_name = "llvm.s390.vbperm"] fn vbperm(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_long_long;
+
+    #[link_name = "llvm.s390.vftcisb"] fn vftcisb(a: vector_float, b: u32) -> PackedTuple<vector_bool_int, i32>;
+    #[link_name = "llvm.s390.vftcidb"] fn vftcidb(a: vector_double, b: u32) -> PackedTuple<vector_bool_long_long, i32>;
 }
 
 impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -2783,6 +2786,38 @@ mod sealed {
         vector_float
         vector_double
     }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorFpTestDataClass {
+        type Result;
+        unsafe fn vec_fp_test_data_class<const CLASS: u32>(self, ptr: *mut i32) -> Self::Result;
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorFpTestDataClass for vector_float {
+        type Result = vector_bool_int;
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_fp_test_data_class<const CLASS: u32>(self, ptr: *mut i32) -> Self::Result {
+            let PackedTuple { x, y } = vftcisb(self, CLASS);
+            unsafe { ptr.write(y) };
+            x
+        }
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorFpTestDataClass for vector_double {
+        type Result = vector_bool_long_long;
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_fp_test_data_class<const CLASS: u32>(self, ptr: *mut i32) -> Self::Result {
+            let PackedTuple { x, y } = vftcidb(self, CLASS);
+            unsafe { ptr.write(y) };
+            x
+        }
+    }
 }
 
 /// Load Count to Block Boundary
@@ -3961,6 +3996,61 @@ pub unsafe fn vec_sel<T: sealed::VectorSel<U>, U>(a: T, b: T, c: U) -> T {
     a.vec_sel(b, c)
 }
 
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_ZERO_P: u32 = 1 << 11;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_ZERO_N: u32 = 1 << 10;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_ZERO: u32 = __VEC_CLASS_FP_ZERO_P | __VEC_CLASS_FP_ZERO_N;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_NORMAL_P: u32 = 1 << 9;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_NORMAL_N: u32 = 1 << 8;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_NORMAL: u32 = __VEC_CLASS_FP_NORMAL_P | __VEC_CLASS_FP_NORMAL_N;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_SUBNORMAL_P: u32 = 1 << 7;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_SUBNORMAL_N: u32 = 1 << 6;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_SUBNORMAL: u32 = __VEC_CLASS_FP_SUBNORMAL_P | __VEC_CLASS_FP_SUBNORMAL_N;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_INFINITY_P: u32 = 1 << 5;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_INFINITY_N: u32 = 1 << 4;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_INFINITY: u32 = __VEC_CLASS_FP_INFINITY_P | __VEC_CLASS_FP_INFINITY_N;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_QNAN_P: u32 = 1 << 3;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_QNAN_N: u32 = 1 << 2;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_QNAN: u32 = __VEC_CLASS_FP_QNAN_P | __VEC_CLASS_FP_QNAN_N;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_SNAN_P: u32 = 1 << 1;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_SNAN_N: u32 = 1 << 0;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_SNAN: u32 = __VEC_CLASS_FP_SNAN_P | __VEC_CLASS_FP_SNAN_N;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_NAN: u32 = __VEC_CLASS_FP_QNAN | __VEC_CLASS_FP_SNAN;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_NOT_NORMAL: u32 =
+    __VEC_CLASS_FP_NAN | __VEC_CLASS_FP_SUBNORMAL | __VEC_CLASS_FP_ZERO | __VEC_CLASS_FP_INFINITY;
+
+/// Vector Floating-Point Test Data Class
+///
+/// You can use the `__VEC_CLASS_FP_*` constants as the argument for this operand
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_fp_test_data_class<T: sealed::VectorFpTestDataClass, const CLASS: u32>(
+    a: T,
+    c: *mut i32,
+) -> T::Result {
+    a.vec_fp_test_data_class::<CLASS>(c)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -4946,4 +5036,39 @@ mod tests {
             assert_eq!(d2.as_array(), &[25, 2, 3, 4]);
         }
     }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_fp_test_data_class() {
+        let mut cc = 42;
+
+        let v1 = vector_double([0.0, f64::NAN]);
+        let v2 = vector_double([f64::INFINITY, 1.0]);
+        let v3 = vector_double([1.0, 2.0]);
+
+        unsafe {
+            let d = vec_fp_test_data_class::<_, __VEC_CLASS_FP_ZERO>(v1, &mut cc);
+            assert_eq!(cc, 1);
+            assert_eq!(d.as_array(), &[!0, 0]);
+
+            let d = vec_fp_test_data_class::<_, __VEC_CLASS_FP_NAN>(v1, &mut cc);
+            assert_eq!(cc, 1);
+            assert_eq!(d.as_array(), &[0, !0]);
+
+            let d = vec_fp_test_data_class::<_, __VEC_CLASS_FP_INFINITY>(v2, &mut cc);
+            assert_eq!(cc, 1);
+            assert_eq!(d.as_array(), &[!0, 0]);
+
+            let d = vec_fp_test_data_class::<_, __VEC_CLASS_FP_INFINITY_N>(v2, &mut cc);
+            assert_eq!(cc, 3);
+            assert_eq!(d.as_array(), &[0, 0]);
+
+            let d = vec_fp_test_data_class::<_, __VEC_CLASS_FP_NORMAL>(v2, &mut cc);
+            assert_eq!(cc, 1);
+            assert_eq!(d.as_array(), &[0, !0]);
+
+            let d = vec_fp_test_data_class::<_, __VEC_CLASS_FP_NORMAL>(v3, &mut cc);
+            assert_eq!(cc, 0);
+            assert_eq!(d.as_array(), &[!0, !0]);
+        }
+    }
 }

From 900615014960b315a2fe23ab3c5a28841cdbdeb6 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Wed, 5 Mar 2025 14:23:15 +0100
Subject: [PATCH 33/57] add `vec_test_mask`

---
 crates/core_arch/src/s390x/vector.rs | 71 ++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 7db2015964..75d4337df0 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -211,6 +211,8 @@ unsafe extern "unadjusted" {
 
     #[link_name = "llvm.s390.vftcisb"] fn vftcisb(a: vector_float, b: u32) -> PackedTuple<vector_bool_int, i32>;
     #[link_name = "llvm.s390.vftcidb"] fn vftcidb(a: vector_double, b: u32) -> PackedTuple<vector_bool_long_long, i32>;
+
+    #[link_name = "llvm.s390.vtm"] fn vtm(a: i8x16, b: i8x16) -> i32;
 }
 
 impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -2818,6 +2820,44 @@ mod sealed {
             x
         }
     }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorTestMask {
+        type Mask;
+        unsafe fn vec_test_mask(self, other: Self::Mask) -> i32;
+    }
+
+    macro_rules! impl_vec_test_mask {
+        ($($instr:ident $ty:ident)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorTestMask for $ty {
+                    type Mask = t_u!($ty);
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_test_mask(self, other: Self::Mask) -> i32 {
+                        vtm(transmute(self), transmute(other))
+                    }
+                }
+            )*
+        }
+    }
+
+    impl_vec_test_mask! {
+        vector_signed_char
+        vector_signed_short
+        vector_signed_int
+        vector_signed_long_long
+
+        vector_unsigned_char
+        vector_unsigned_short
+        vector_unsigned_int
+        vector_unsigned_long_long
+
+        vector_float
+        vector_double
+    }
 }
 
 /// Load Count to Block Boundary
@@ -4051,6 +4091,16 @@ pub unsafe fn vec_fp_test_data_class<T: sealed::VectorFpTestDataClass, const CLA
     a.vec_fp_test_data_class::<CLASS>(c)
 }
 
+/// Vector Test under Mask
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_test_mask<T: sealed::VectorTestMask>(a: T, b: T::Mask) -> i32 {
+    // I can't find much information about this, but this might just be a check for whether the
+    // bitwise and of a and b is non-zero?
+    a.vec_test_mask(b)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -5071,4 +5121,25 @@ mod tests {
             assert_eq!(d.as_array(), &[!0, !0]);
         }
     }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_test_mask() {
+        unsafe {
+            let v = vector_unsigned_long_long([0xFF00FF00FF00FF00; 2]);
+            let m = vector_unsigned_long_long([0x0000FF000000FF00; 2]);
+            assert_eq!(vec_test_mask(v, m), 3);
+
+            let v = vector_unsigned_long_long([u64::MAX; 2]);
+            let m = vector_unsigned_long_long([0; 2]);
+            assert_eq!(vec_test_mask(v, m), 0);
+
+            let v = vector_unsigned_long_long([0; 2]);
+            let m = vector_unsigned_long_long([u64::MAX; 2]);
+            assert_eq!(vec_test_mask(v, m), 0);
+
+            let v = vector_unsigned_long_long([0xAAAAAAAAAAAAAAAA; 2]);
+            let m = vector_unsigned_long_long([0xAAAAAAAAAAAAAAAA; 2]);
+            assert_eq!(vec_test_mask(v, m), 3);
+        }
+    }
 }

From 7914100b7c55d9861f12993404d77faff29e5657 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Wed, 5 Mar 2025 15:07:59 +0100
Subject: [PATCH 34/57] add `vec_search_string_cc` and
 `vec_search_string_until_zero_cc`

---
 crates/core_arch/src/s390x/vector.rs | 159 +++++++++++++++++++++++++++
 1 file changed, 159 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 75d4337df0..65f36bf4d0 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -213,6 +213,14 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vftcidb"] fn vftcidb(a: vector_double, b: u32) -> PackedTuple<vector_bool_long_long, i32>;
 
     #[link_name = "llvm.s390.vtm"] fn vtm(a: i8x16, b: i8x16) -> i32;
+
+    #[link_name = "llvm.s390.vstrsb"] fn vstrsb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> PackedTuple<vector_unsigned_char, i32>;
+    #[link_name = "llvm.s390.vstrsh"] fn vstrsh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_char) -> PackedTuple<vector_unsigned_char, i32>;
+    #[link_name = "llvm.s390.vstrsf"] fn vstrsf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_char) -> PackedTuple<vector_unsigned_char, i32>;
+
+    #[link_name = "llvm.s390.vstrszb"] fn vstrszb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> PackedTuple<vector_unsigned_char, i32>;
+    #[link_name = "llvm.s390.vstrszh"] fn vstrszh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_char) -> PackedTuple<vector_unsigned_char, i32>;
+    #[link_name = "llvm.s390.vstrszf"] fn vstrszf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_char) -> PackedTuple<vector_unsigned_char, i32>;
 }
 
 impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -2858,6 +2866,63 @@ mod sealed {
         vector_float
         vector_double
     }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorSearchString {
+        unsafe fn vec_search_string_cc(
+            self,
+            b: Self,
+            c: vector_unsigned_char,
+            d: &mut i32,
+        ) -> vector_unsigned_char;
+
+        unsafe fn vec_search_string_until_zero_cc(
+            self,
+            b: Self,
+            c: vector_unsigned_char,
+            d: &mut i32,
+        ) -> vector_unsigned_char;
+    }
+
+    macro_rules! impl_vec_search_string{
+        ($($intr_s:ident $intr_sz:ident $ty:ident)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorSearchString for $ty {
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_search_string_cc(self, b: Self, c: vector_unsigned_char, d: &mut i32) -> vector_unsigned_char {
+                        let PackedTuple { x,y } = $intr_s(transmute(self), transmute(b), c);
+                        *d = y;
+                        x
+                    }
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_search_string_until_zero_cc(self, b: Self, c: vector_unsigned_char, d: &mut i32) -> vector_unsigned_char {
+                        let PackedTuple { x,y } = $intr_sz(transmute(self), transmute(b), c);
+                        *d = y;
+                        x
+                    }
+                }
+
+            )*
+        }
+    }
+
+    impl_vec_search_string! {
+        vstrsb vstrszb vector_signed_char
+        vstrsb vstrszb vector_bool_char
+        vstrsb vstrszb vector_unsigned_char
+
+        vstrsh vstrszh vector_signed_short
+        vstrsh vstrszh vector_bool_short
+        vstrsh vstrszh vector_unsigned_short
+
+        vstrsf vstrszf vector_signed_int
+        vstrsf vstrszf vector_bool_int
+        vstrsf vstrszf vector_unsigned_int
+    }
 }
 
 /// Load Count to Block Boundary
@@ -4101,6 +4166,32 @@ pub unsafe fn vec_test_mask<T: sealed::VectorTestMask>(a: T, b: T::Mask) -> i32
     a.vec_test_mask(b)
 }
 
+/// Vector Search String
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_search_string_cc<T: sealed::VectorSearchString>(
+    a: T,
+    b: T,
+    c: vector_unsigned_char,
+    d: &mut i32,
+) -> vector_unsigned_char {
+    a.vec_search_string_cc(b, c, d)
+}
+
+/// Vector Search String Until Zero
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_search_string_until_zero_cc<T: sealed::VectorSearchString>(
+    a: T,
+    b: T,
+    c: vector_unsigned_char,
+    d: &mut i32,
+) -> vector_unsigned_char {
+    a.vec_search_string_until_zero_cc(b, c, d)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -5142,4 +5233,72 @@ mod tests {
             assert_eq!(vec_test_mask(v, m), 3);
         }
     }
+
+    #[simd_test(enable = "vector-enhancements-2")]
+    fn test_vec_search_string_cc() {
+        unsafe {
+            let b = vector_unsigned_char(*b"ABCD------------");
+            let c = vector_unsigned_char([4; 16]);
+            let mut d = 0i32;
+
+            let haystack = vector_unsigned_char(*b"__ABCD__________");
+            let result = vec_search_string_cc(haystack, b, c, &mut d);
+            assert_eq!(result.as_array()[7], 2);
+            assert_eq!(d, 2);
+
+            let haystack = vector_unsigned_char(*b"___ABCD_________");
+            let result = vec_search_string_cc(haystack, b, c, &mut d);
+            assert_eq!(result.as_array()[7], 3);
+            assert_eq!(d, 2);
+
+            let haystack = vector_unsigned_char(*b"________________");
+            let result = vec_search_string_cc(haystack, b, c, &mut d);
+            assert_eq!(result.as_array()[7], 16);
+            assert_eq!(d, 0);
+
+            let haystack = vector_unsigned_char(*b"______\0_________");
+            let result = vec_search_string_cc(haystack, b, c, &mut d);
+            assert_eq!(result.as_array()[7], 16);
+            assert_eq!(d, 0);
+
+            let haystack = vector_unsigned_char(*b"______\0__ABCD___");
+            let result = vec_search_string_cc(haystack, b, c, &mut d);
+            assert_eq!(result.as_array()[7], 9);
+            assert_eq!(d, 2);
+        }
+    }
+
+    #[simd_test(enable = "vector-enhancements-2")]
+    fn test_vec_search_string_until_zero_cc() {
+        unsafe {
+            let b = vector_unsigned_char(*b"ABCD\0\0\0\0\0\0\0\0\0\0\0\0");
+            let c = vector_unsigned_char([16; 16]);
+            let mut d = 0i32;
+
+            let haystack = vector_unsigned_char(*b"__ABCD__________");
+            let result = vec_search_string_until_zero_cc(haystack, b, c, &mut d);
+            assert_eq!(result.as_array()[7], 2);
+            assert_eq!(d, 2);
+
+            let haystack = vector_unsigned_char(*b"___ABCD_________");
+            let result = vec_search_string_until_zero_cc(haystack, b, c, &mut d);
+            assert_eq!(result.as_array()[7], 3);
+            assert_eq!(d, 2);
+
+            let haystack = vector_unsigned_char(*b"________________");
+            let result = vec_search_string_until_zero_cc(haystack, b, c, &mut d);
+            assert_eq!(result.as_array()[7], 16);
+            assert_eq!(d, 0);
+
+            let haystack = vector_unsigned_char(*b"______\0_________");
+            let result = vec_search_string_until_zero_cc(haystack, b, c, &mut d);
+            assert_eq!(result.as_array()[7], 16);
+            assert_eq!(d, 1);
+
+            let haystack = vector_unsigned_char(*b"______\0__ABCD___");
+            let result = vec_search_string_until_zero_cc(haystack, b, c, &mut d);
+            assert_eq!(result.as_array()[7], 16);
+            assert_eq!(d, 1);
+        }
+    }
 }

From a48b50419d7d0e8baa74763ce518d32283cb2d32 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Wed, 5 Mar 2025 16:34:03 +0100
Subject: [PATCH 35/57] add `vec_double` and `vec_float`

---
 crates/core_arch/src/s390x/vector.rs | 158 +++++++++++++++++++++++++++
 1 file changed, 158 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 65f36bf4d0..8a63b2ad52 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -2923,6 +2923,86 @@ mod sealed {
         vstrsf vstrszf vector_bool_int
         vstrsf vstrszf vector_unsigned_int
     }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vcdgb))]
+    pub unsafe fn vcdgb(a: vector_signed_long_long) -> vector_double {
+        simd_as(a)
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vcdlgb))]
+    pub unsafe fn vcdlgb(a: vector_unsigned_long_long) -> vector_double {
+        simd_as(a)
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorDouble {
+        unsafe fn vec_double(self) -> vector_double;
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorDouble for vector_signed_long_long {
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_double(self) -> vector_double {
+            vcdgb(self)
+        }
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorDouble for vector_unsigned_long_long {
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_double(self) -> vector_double {
+            vcdlgb(self)
+        }
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(
+        all(test, target_feature = "vector-enhancements-2"),
+        assert_instr(vcefb)
+    )]
+    pub unsafe fn vcefb(a: vector_signed_int) -> vector_float {
+        simd_as(a)
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(
+        all(test, target_feature = "vector-enhancements-2"),
+        assert_instr(vcelfb)
+    )]
+    pub unsafe fn vcelfb(a: vector_unsigned_int) -> vector_float {
+        simd_as(a)
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorFloat {
+        unsafe fn vec_float(self) -> vector_float;
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorFloat for vector_signed_int {
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_float(self) -> vector_float {
+            vcefb(self)
+        }
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorFloat for vector_unsigned_int {
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_float(self) -> vector_float {
+            vcelfb(self)
+        }
+    }
 }
 
 /// Load Count to Block Boundary
@@ -4192,6 +4272,48 @@ pub unsafe fn vec_search_string_until_zero_cc<T: sealed::VectorSearchString>(
     a.vec_search_string_until_zero_cc(b, c, d)
 }
 
+/// Vector Convert from float (even elements) to double
+#[inline]
+#[target_feature(enable = "vector-enhancements-1")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+// FIXME: this emits `vflls` where `vldeb` is expected
+// #[cfg_attr(all(test, target_feature = "vector-enhancements-1"), assert_instr(vldeb))]
+pub unsafe fn vec_doublee(a: vector_float) -> vector_double {
+    let even = simd_shuffle::<_, _, f32x2>(a, a, const { u32x2::from_array([0, 2]) });
+    simd_as(even)
+}
+
+/// Vector Convert from double to float (even elements)
+#[inline]
+#[target_feature(enable = "vector-enhancements-1")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+// FIXME: the C version uses a shuffle mask with poison; we can't do that
+// #[cfg_attr(all(test, target_feature = "vector-enhancements-1"), assert_instr(vledb))]
+pub unsafe fn vec_floate(a: vector_double) -> vector_float {
+    let truncated: f32x2 = simd_as(a);
+    simd_shuffle(
+        truncated,
+        truncated,
+        const { u32x4::from_array([0, 0, 1, 1]) },
+    )
+}
+
+/// Vector Convert from int to float
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_float(a: impl sealed::VectorFloat) -> vector_float {
+    a.vec_float()
+}
+
+/// Vector Convert from long long to double
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_double(a: impl sealed::VectorDouble) -> vector_double {
+    a.vec_double()
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -5301,4 +5423,40 @@ mod tests {
             assert_eq!(d, 1);
         }
     }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_doublee() {
+        unsafe {
+            let v = vector_float([1.0, 2.0, 3.0, 4.0]);
+            assert_eq!(vec_doublee(v).as_array(), &[1.0, 3.0]);
+
+            let v = vector_float([f32::NAN, 2.0, f32::INFINITY, 4.0]);
+            let d = vec_doublee(v);
+            assert!(d.as_array()[0].is_nan());
+            assert_eq!(d.as_array()[1], f64::INFINITY);
+        }
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_floate() {
+        // NOTE: indices 1 and 3 can have an arbitrary value. With the C version
+        // these are poison values, our version initializes the memory but its
+        // value still should not be relied upon by application code.
+        unsafe {
+            let v = vector_double([1.0, 2.0]);
+            let d = vec_floate(v);
+            assert_eq!(d.as_array()[0], 1.0);
+            assert_eq!(d.as_array()[2], 2.0);
+
+            let v = vector_double([f64::NAN, f64::INFINITY]);
+            let d = vec_floate(v);
+            assert!(d.as_array()[0].is_nan());
+            assert_eq!(d.as_array()[2], f32::INFINITY);
+
+            let v = vector_double([f64::MIN, f64::MAX]);
+            let d = vec_floate(v);
+            assert_eq!(d.as_array()[0], f64::MIN as f32);
+            assert_eq!(d.as_array()[2], f64::MAX as f32);
+        }
+    }
 }

From 814f140fb4cc70dbe85cc7c36cdc59fc7c080639 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Wed, 5 Mar 2025 18:42:56 +0100
Subject: [PATCH 36/57] add `vec_extend_s64`

---
 crates/core_arch/src/s390x/vector.rs | 88 ++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 8a63b2ad52..918cf0a8b1 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -3003,6 +3003,72 @@ mod sealed {
             vcelfb(self)
         }
     }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorExtendSigned64 {
+        unsafe fn vec_extend_s64(self) -> vector_signed_long_long;
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    // FIXME(llvm): https://github.com/llvm/llvm-project/issues/129899
+    // #[cfg_attr(test, assert_instr(vsegb))]
+    pub unsafe fn vsegb(a: vector_signed_char) -> vector_signed_long_long {
+        simd_as(simd_shuffle::<_, _, i8x2>(
+            a,
+            a,
+            const { u32x2::from_array([7, 15]) },
+        ))
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    // FIXME(llvm): https://github.com/llvm/llvm-project/issues/129899
+    // #[cfg_attr(test, assert_instr(vsegh))]
+    pub unsafe fn vsegh(a: vector_signed_short) -> vector_signed_long_long {
+        simd_as(simd_shuffle::<_, _, i16x2>(
+            a,
+            a,
+            const { u32x2::from_array([3, 7]) },
+        ))
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    // FIXME(llvm): https://github.com/llvm/llvm-project/issues/129899
+    // #[cfg_attr(test, assert_instr(vsegf))]
+    pub unsafe fn vsegf(a: vector_signed_int) -> vector_signed_long_long {
+        simd_as(simd_shuffle::<_, _, i32x2>(
+            a,
+            a,
+            const { u32x2::from_array([1, 3]) },
+        ))
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorExtendSigned64 for vector_signed_char {
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_extend_s64(self) -> vector_signed_long_long {
+            vsegb(self)
+        }
+    }
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorExtendSigned64 for vector_signed_short {
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_extend_s64(self) -> vector_signed_long_long {
+            vsegh(self)
+        }
+    }
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorExtendSigned64 for vector_signed_int {
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_extend_s64(self) -> vector_signed_long_long {
+            vsegf(self)
+        }
+    }
 }
 
 /// Load Count to Block Boundary
@@ -4314,6 +4380,14 @@ pub unsafe fn vec_double(a: impl sealed::VectorDouble) -> vector_double {
     a.vec_double()
 }
 
+/// Vector Sign Extend to Doubleword
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_extend_s64(a: impl sealed::VectorExtendSigned64) -> vector_signed_long_long {
+    a.vec_extend_s64()
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -5459,4 +5533,18 @@ mod tests {
             assert_eq!(d.as_array()[2], f64::MAX as f32);
         }
     }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_extend_s64() {
+        unsafe {
+            let v = vector_signed_char([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
+            assert_eq!(vec_extend_s64(v).as_array(), &[7, 15]);
+
+            let v = vector_signed_short([0, 1, 2, 3, 4, 5, 6, 7]);
+            assert_eq!(vec_extend_s64(v).as_array(), &[3, 7]);
+
+            let v = vector_signed_int([0, 1, 2, 3]);
+            assert_eq!(vec_extend_s64(v).as_array(), &[1, 3]);
+        }
+    }
 }

From cd185a8609511a90d34a2045f78352ba2614f1b8 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Wed, 5 Mar 2025 19:13:48 +0100
Subject: [PATCH 37/57] add `vec_signed` and `vec_unsigned`

---
 crates/core_arch/src/s390x/vector.rs | 68 ++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 918cf0a8b1..2b9f94017e 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -3069,6 +3069,35 @@ mod sealed {
             vsegf(self)
         }
     }
+
+    // NOTE: VectorSigned and VectorUnsigned make strong safety assumptions around floats.
+    // This is what C provides, but even IBM does not clearly document these constraints.
+    //
+    // https://doc.rust-lang.org/std/intrinsics/simd/fn.simd_cast.html
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorSigned {
+        type Result;
+        unsafe fn vec_signed(self) -> Self::Result;
+    }
+
+    test_impl! { vcgsb (a: vector_float) -> vector_signed_int [simd_cast, "vector-enhancements-2" vcgsb] }
+    test_impl! { vcgdb (a: vector_double) -> vector_signed_long_long [simd_cast, vcgdb] }
+
+    impl_vec_trait! { [VectorSigned vec_signed] vcgsb (vector_float) -> vector_signed_int }
+    impl_vec_trait! { [VectorSigned vec_signed] vcgdb (vector_double) -> vector_signed_long_long }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorUnsigned {
+        type Result;
+        unsafe fn vec_unsigned(self) -> Self::Result;
+    }
+
+    test_impl! { vclgsb (a: vector_float) -> vector_unsigned_int [simd_cast, "vector-enhancements-2" vclgsb] }
+    test_impl! { vclgdb (a: vector_double) -> vector_unsigned_long_long [simd_cast, vclgdb] }
+
+    impl_vec_trait! { [VectorUnsigned vec_unsigned] vclgsb (vector_float) -> vector_unsigned_int }
+    impl_vec_trait! { [VectorUnsigned vec_unsigned] vclgdb (vector_double) -> vector_unsigned_long_long }
 }
 
 /// Load Count to Block Boundary
@@ -4388,6 +4417,22 @@ pub unsafe fn vec_extend_s64(a: impl sealed::VectorExtendSigned64) -> vector_sig
     a.vec_extend_s64()
 }
 
+/// Vector Convert floating point to signed
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_signed<T: sealed::VectorSigned>(a: T) -> T::Result {
+    a.vec_signed()
+}
+
+/// Vector Convert floating point to unsigned
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_unsigned<T: sealed::VectorUnsigned>(a: T) -> T::Result {
+    a.vec_unsigned()
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -5547,4 +5592,27 @@ mod tests {
             assert_eq!(vec_extend_s64(v).as_array(), &[1, 3]);
         }
     }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_signed() {
+        unsafe {
+            let v = vector_float([1.0, 2.5, -2.5, -0.0]);
+            assert_eq!(vec_signed(v).as_array(), &[1, 2, -2, 0]);
+
+            let v = vector_double([2.5, -2.5]);
+            assert_eq!(vec_signed(v).as_array(), &[2, -2]);
+        }
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_unsigned() {
+        // NOTE: converting a negative floating point value is UB!
+        unsafe {
+            let v = vector_float([1.0, 2.5, 3.5, 0.0]);
+            assert_eq!(vec_unsigned(v).as_array(), &[1, 2, 3, 0]);
+
+            let v = vector_double([2.5, 3.5]);
+            assert_eq!(vec_unsigned(v).as_array(), &[2, 3]);
+        }
+    }
 }

From 900d502e3be4aa327f450b5a9195595cbab80464 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Wed, 5 Mar 2025 21:54:57 +0100
Subject: [PATCH 38/57] add `vec_cp_until_zero` and `vec_cp_until_zero_cc`

---
 crates/core_arch/src/s390x/vector.rs | 115 +++++++++++++++++++++++++++
 1 file changed, 115 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 2b9f94017e..69fabbbe63 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -221,6 +221,14 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vstrszb"] fn vstrszb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> PackedTuple<vector_unsigned_char, i32>;
     #[link_name = "llvm.s390.vstrszh"] fn vstrszh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_char) -> PackedTuple<vector_unsigned_char, i32>;
     #[link_name = "llvm.s390.vstrszf"] fn vstrszf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_char) -> PackedTuple<vector_unsigned_char, i32>;
+
+    #[link_name = "llvm.s390.vistrb"] fn vistrb(a: vector_unsigned_char) -> vector_unsigned_char;
+    #[link_name = "llvm.s390.vistrh"] fn vistrh(a: vector_unsigned_short) -> vector_unsigned_short;
+    #[link_name = "llvm.s390.vistrf"] fn vistrf(a: vector_unsigned_int) -> vector_unsigned_int;
+
+    #[link_name = "llvm.s390.vistrbs"] fn vistrbs(a: vector_unsigned_char) -> PackedTuple<vector_unsigned_char, i32>;
+    #[link_name = "llvm.s390.vistrhs"] fn vistrhs(a: vector_unsigned_short) -> PackedTuple<vector_unsigned_short, i32>;
+    #[link_name = "llvm.s390.vistrfs"] fn vistrfs(a: vector_unsigned_int) -> PackedTuple<vector_unsigned_int, i32>;
 }
 
 impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -3098,6 +3106,68 @@ mod sealed {
 
     impl_vec_trait! { [VectorUnsigned vec_unsigned] vclgsb (vector_float) -> vector_unsigned_int }
     impl_vec_trait! { [VectorUnsigned vec_unsigned] vclgdb (vector_double) -> vector_unsigned_long_long }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorCopyUntilZero {
+        unsafe fn vec_cp_until_zero(self) -> Self;
+    }
+
+    test_impl! { vec_vistrb (a: vector_unsigned_char) -> vector_unsigned_char [vistrb, vistrb] }
+    test_impl! { vec_vistrh (a: vector_unsigned_short) -> vector_unsigned_short [vistrh, vistrh] }
+    test_impl! { vec_vistrf (a: vector_unsigned_int) -> vector_unsigned_int [vistrf, vistrf] }
+
+    impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrb (vector_signed_char) }
+    impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrb (vector_bool_char) }
+    impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrb (vector_unsigned_char) }
+
+    impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrh (vector_signed_short) }
+    impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrh (vector_bool_short) }
+    impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrh (vector_unsigned_short) }
+
+    impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrf (vector_signed_int) }
+    impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrf (vector_bool_int) }
+    impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrf (vector_unsigned_int) }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorCopyUntilZeroCC {
+        unsafe fn vec_cp_until_zero_cc(self, cc: *mut i32) -> Self;
+    }
+
+    test_impl! { vec_vistrbs (a: vector_unsigned_char) -> PackedTuple<vector_unsigned_char, i32> [vistrbs, vistrbs] }
+    test_impl! { vec_vistrhs (a: vector_unsigned_short) -> PackedTuple<vector_unsigned_short, i32> [vistrhs, vistrhs] }
+    test_impl! { vec_vistrfs (a: vector_unsigned_int) -> PackedTuple<vector_unsigned_int, i32> [vistrfs, vistrfs] }
+
+    macro_rules! impl_vec_copy_until_zero_cc {
+        ($($intr:ident $ty:ident)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorCopyUntilZeroCC for $ty {
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_cp_until_zero_cc(self, cc: *mut i32) -> Self {
+                        let PackedTuple { x,y } = $intr(transmute(self));
+                        cc.write(y);
+                        transmute(x)
+                    }
+                }
+
+            )*
+        }
+    }
+
+    impl_vec_copy_until_zero_cc! {
+        vec_vistrbs vector_signed_char
+        vec_vistrbs vector_bool_char
+        vec_vistrbs vector_unsigned_char
+
+        vec_vistrhs vector_signed_short
+        vec_vistrhs vector_bool_short
+        vec_vistrhs vector_unsigned_short
+
+        vec_vistrfs vector_signed_int
+        vec_vistrfs vector_bool_int
+        vec_vistrfs vector_unsigned_int
+    }
 }
 
 /// Load Count to Block Boundary
@@ -4433,6 +4503,22 @@ pub unsafe fn vec_unsigned<T: sealed::VectorUnsigned>(a: T) -> T::Result {
     a.vec_unsigned()
 }
 
+/// Vector Copy Until Zero
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cp_until_zero<T: sealed::VectorCopyUntilZero>(a: T) -> T {
+    a.vec_cp_until_zero()
+}
+
+/// Vector Copy Until Zero
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cp_until_zero_cc<T: sealed::VectorCopyUntilZeroCC>(a: T, cc: *mut i32) -> T {
+    a.vec_cp_until_zero_cc(cc)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -5615,4 +5701,33 @@ mod tests {
             assert_eq!(vec_unsigned(v).as_array(), &[2, 3]);
         }
     }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_cp_until_zero() {
+        unsafe {
+            let v = vector_signed_int([1, 2, 3, 4]);
+            let d = vec_cp_until_zero(v);
+            assert_eq!(d.as_array(), &[1, 2, 3, 4]);
+
+            let v = vector_signed_int([1, 2, 0, 4]);
+            let d = vec_cp_until_zero(v);
+            assert_eq!(d.as_array(), &[1, 2, 0, 0]);
+        }
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_cp_until_zero_cc() {
+        let mut cc = 0;
+        unsafe {
+            let v = vector_signed_int([1, 2, 3, 4]);
+            let d = vec_cp_until_zero_cc(v, &mut cc);
+            assert_eq!(d.as_array(), &[1, 2, 3, 4]);
+            assert_eq!(cc, 3);
+
+            let v = vector_signed_int([1, 2, 0, 4]);
+            let d = vec_cp_until_zero_cc(v, &mut cc);
+            assert_eq!(d.as_array(), &[1, 2, 0, 0]);
+            assert_eq!(cc, 0);
+        }
+    }
 }

From 01c208f1f4ba44a821ab968fb038812081a43727 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Wed, 5 Mar 2025 22:14:45 +0100
Subject: [PATCH 39/57] add `vec_msum_u128`

---
 crates/core_arch/src/s390x/vector.rs | 45 ++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 69fabbbe63..c30d35f16a 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -229,6 +229,8 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vistrbs"] fn vistrbs(a: vector_unsigned_char) -> PackedTuple<vector_unsigned_char, i32>;
     #[link_name = "llvm.s390.vistrhs"] fn vistrhs(a: vector_unsigned_short) -> PackedTuple<vector_unsigned_short, i32>;
     #[link_name = "llvm.s390.vistrfs"] fn vistrfs(a: vector_unsigned_int) -> PackedTuple<vector_unsigned_int, i32>;
+
+    #[link_name = "llvm.s390.vmslg"] fn vmslg(a: vector_unsigned_long_long, b: vector_unsigned_long_long, c: u128, d: u32) -> u128;
 }
 
 impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -4519,6 +4521,27 @@ pub unsafe fn vec_cp_until_zero_cc<T: sealed::VectorCopyUntilZeroCC>(a: T, cc: *
     a.vec_cp_until_zero_cc(cc)
 }
 
+/// Vector Multiply Sum Logical
+#[inline]
+#[target_feature(enable = "vector-enhancements-1")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(
+    all(test, target_feature = "vector-enhancements-1"),
+    assert_instr(vmslg, D = 4)
+)]
+pub unsafe fn vec_msum_u128<const D: u32>(
+    a: vector_unsigned_long_long,
+    b: vector_unsigned_long_long,
+    c: vector_unsigned_char,
+) -> vector_unsigned_char {
+    const {
+        if !matches!(D, 0 | 4 | 8 | 12) {
+            panic!("D needs to be one of 0, 4, 8, 12");
+        }
+    };
+    transmute(vmslg(a, b, transmute(c), D))
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -5730,4 +5753,26 @@ mod tests {
             assert_eq!(cc, 0);
         }
     }
+
+    #[simd_test(enable = "vector-enhancements-1")]
+    fn test_vec_msum_u128() {
+        let a = vector_unsigned_long_long([1, 2]);
+        let b = vector_unsigned_long_long([3, 4]);
+
+        unsafe {
+            let c: vector_unsigned_char = transmute(100u128);
+
+            let d: u128 = transmute(vec_msum_u128::<0>(a, b, c));
+            assert_eq!(d, (1 * 3) + (2 * 4) + 100);
+
+            let d: u128 = transmute(vec_msum_u128::<4>(a, b, c));
+            assert_eq!(d, (1 * 3) + (2 * 4) * 2 + 100);
+
+            let d: u128 = transmute(vec_msum_u128::<8>(a, b, c));
+            assert_eq!(d, (1 * 3) * 2 + (2 * 4) + 100);
+
+            let d: u128 = transmute(vec_msum_u128::<12>(a, b, c));
+            assert_eq!(d, (1 * 3) * 2 + (2 * 4) * 2 + 100);
+        }
+    }
 }

From bd478a9457026ab160e95cc0b0bd469d2b7ef1f1 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Thu, 6 Mar 2025 00:40:06 +0100
Subject: [PATCH 40/57] add `vec_sld`, `vec_sldb`, `vec_sldw` and `vec_srdb`

---
 crates/core_arch/src/s390x/vector.rs | 165 +++++++++++++++++++++++++++
 1 file changed, 165 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index c30d35f16a..04cb951316 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -97,6 +97,10 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vsrlb"] fn vsrlb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char;
     #[link_name = "llvm.s390.vslb"] fn vslb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char;
 
+    #[link_name = "llvm.s390.vsldb"] fn vsldb(a: i8x16, b: i8x16, c: u32) -> i8x16;
+    #[link_name = "llvm.s390.vsld"] fn vsld(a: i8x16, b: i8x16, c: u32) -> i8x16;
+    #[link_name = "llvm.s390.vsrd"] fn vsrd(a: i8x16, b: i8x16, c: u32) -> i8x16;
+
     #[link_name = "llvm.fshl.v16i8"] fn fshlb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char;
     #[link_name = "llvm.fshl.v8i16"] fn fshlh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short;
     #[link_name = "llvm.fshl.v4i32"] fn fshlf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int;
@@ -3170,6 +3174,86 @@ mod sealed {
         vec_vistrfs vector_bool_int
         vec_vistrfs vector_unsigned_int
     }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorSrdb {
+        unsafe fn vec_srdb<const C: u32>(self, b: Self) -> Self;
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorSld {
+        unsafe fn vec_sld<const C: u32>(self, b: Self) -> Self;
+
+        unsafe fn vec_sldw<const C: u32>(self, b: Self) -> Self;
+
+        unsafe fn vec_sldb<const C: u32>(self, b: Self) -> Self;
+    }
+
+    // FIXME(llvm) https://github.com/llvm/llvm-project/issues/129955
+    // ideally we could implement this in terms of llvm.fshl.i128
+    // #[link_name = "llvm.fshl.i128"] fn fshl_i128(a: u128, b: u128, c: u128) -> u128;
+    // transmute(fshl_i128(transmute(a), transmute(b), const { C * 8 } ))
+
+    macro_rules! impl_vec_sld {
+        ($($ty:ident)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorSld for $ty {
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_sld<const C: u32>(self, b: Self) -> Self {
+                        static_assert_uimm_bits!(C, 4);
+                        transmute(vsldb(transmute(self), transmute(b), C))
+                    }
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_sldw<const C: u32>(self, b: Self) -> Self {
+                        static_assert_uimm_bits!(C, 2);
+                        transmute(vsldb(transmute(self), transmute(b), const { 4 * C }))
+                    }
+
+                    #[inline]
+                    #[target_feature(enable = "vector-enhancements-2")]
+                    unsafe fn vec_sldb<const C: u32>(self, b: Self) -> Self {
+                        static_assert_uimm_bits!(C, 3);
+                        transmute(vsld(transmute(self), transmute(b), C))
+                    }
+                }
+
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorSrdb for $ty {
+                    #[inline]
+                    #[target_feature(enable = "vector-enhancements-2")]
+                    unsafe fn vec_srdb<const C: u32>(self, b: Self) -> Self {
+                        static_assert_uimm_bits!(C, 3);
+                        transmute(vsrd(transmute(self), transmute(b), C))
+                    }
+                }
+            )*
+        }
+    }
+
+    impl_vec_sld! {
+        vector_signed_char
+        vector_bool_char
+        vector_unsigned_char
+
+        vector_signed_short
+        vector_bool_short
+        vector_unsigned_short
+
+        vector_signed_int
+        vector_bool_int
+        vector_unsigned_int
+
+        vector_signed_long_long
+        vector_bool_long_long
+        vector_unsigned_long_long
+
+        vector_float
+        vector_double
+    }
 }
 
 /// Load Count to Block Boundary
@@ -4542,6 +4626,42 @@ pub unsafe fn vec_msum_u128<const D: u32>(
     transmute(vmslg(a, b, transmute(c), D))
 }
 
+/// Vector Shift Left Double by Byte
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_sld<T: sealed::VectorSld, const C: u32>(a: T, b: T) -> T {
+    static_assert_uimm_bits!(C, 4);
+    a.vec_sld::<C>(b)
+}
+
+/// Vector Shift Left Double by Word
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_sldw<T: sealed::VectorSld, const C: u32>(a: T, b: T) -> T {
+    static_assert_uimm_bits!(C, 2);
+    a.vec_sldw::<C>(b)
+}
+
+/// Vector Shift Left Double by Bit
+#[inline]
+#[target_feature(enable = "vector-enhancements-2")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_sldb<T: sealed::VectorSld, const C: u32>(a: T, b: T) -> T {
+    static_assert_uimm_bits!(C, 3);
+    a.vec_sldb::<C>(b)
+}
+
+/// Vector Shift Right Double by Bit
+#[inline]
+#[target_feature(enable = "vector-enhancements-2")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_srdb<T: sealed::VectorSrdb, const C: u32>(a: T, b: T) -> T {
+    static_assert_uimm_bits!(C, 3);
+    a.vec_srdb::<C>(b)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -5775,4 +5895,49 @@ mod tests {
             assert_eq!(d, (1 * 3) * 2 + (2 * 4) * 2 + 100);
         }
     }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_sld() {
+        let a = vector_unsigned_long_long([0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA]);
+        let b = vector_unsigned_long_long([0xBBBBBBBBBBBBBBBB, 0xBBBBBBBBBBBBBBBB]);
+
+        unsafe {
+            let d = vec_sld::<_, 4>(a, b);
+            assert_eq!(d.as_array(), &[0xAAAAAAAAAAAAAAAA, 0xAAAAAAAABBBBBBBB]);
+        }
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_sldw() {
+        let a = vector_unsigned_long_long([0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA]);
+        let b = vector_unsigned_long_long([0xBBBBBBBBBBBBBBBB, 0xBBBBBBBBBBBBBBBB]);
+
+        unsafe {
+            let d = vec_sldw::<_, 1>(a, b);
+            assert_eq!(d.as_array(), &[0xAAAAAAAAAAAAAAAA, 0xAAAAAAAABBBBBBBB]);
+        }
+    }
+
+    #[simd_test(enable = "vector-enhancements-2")]
+    fn test_vec_sldb() {
+        let a = vector_unsigned_long_long([0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA]);
+        let b = vector_unsigned_long_long([0xBBBBBBBBBBBBBBBB, 0xBBBBBBBBBBBBBBBB]);
+
+        unsafe {
+            let d = vec_sldb::<_, 4>(a, b);
+            assert_eq!(d.as_array(), &[0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAB]);
+        }
+    }
+
+    #[simd_test(enable = "vector-enhancements-2")]
+    fn test_vec_srdb() {
+        let a = vector_unsigned_long_long([0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA]);
+        let b = vector_unsigned_long_long([0xBBBBBBBBBBBBBBBB, 0xBBBBBBBBBBBBBBBB]);
+
+        unsafe {
+            let d = vec_srdb::<_, 4>(a, b);
+            println!("{:x?}", &d);
+            assert_eq!(d.as_array(), &[0xABBBBBBBBBBBBBBB, 0xBBBBBBBBBBBBBBBB]);
+        }
+    }
 }

From 747870fd45407eaac061dd3e6681b7f7cad05d52 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Thu, 6 Mar 2025 16:27:00 +0100
Subject: [PATCH 41/57] add `vec_cmprg`

---
 crates/core_arch/src/s390x/vector.rs | 105 +++++++++++++++++++++++++--
 1 file changed, 97 insertions(+), 8 deletions(-)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 04cb951316..accae48b29 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -235,6 +235,14 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vistrfs"] fn vistrfs(a: vector_unsigned_int) -> PackedTuple<vector_unsigned_int, i32>;
 
     #[link_name = "llvm.s390.vmslg"] fn vmslg(a: vector_unsigned_long_long, b: vector_unsigned_long_long, c: u128, d: u32) -> u128;
+
+    #[link_name = "llvm.s390.vstrcb"] fn vstrcb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char, d: u32) -> vector_bool_char;
+    #[link_name = "llvm.s390.vstrch"] fn vstrch(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short, d: u32) -> vector_bool_short;
+    #[link_name = "llvm.s390.vstrcf"] fn vstrcf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int, d: u32) -> vector_bool_int;
+
+    #[link_name = "llvm.s390.vstrcbs"] fn vstrcbs(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char, d: u32) -> PackedTuple<vector_bool_char, i32>;
+    #[link_name = "llvm.s390.vstrchs"] fn vstrchs(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short, d: u32) -> PackedTuple<vector_bool_short, i32>;
+    #[link_name = "llvm.s390.vstrcfs"] fn vstrcfs(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int, d: u32) -> PackedTuple<vector_bool_int, i32>;
 }
 
 impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -365,6 +373,13 @@ const fn validate_block_boundary(block_boundary: u16) -> u32 {
     block_boundary as u32 >> 7
 }
 
+enum FindImm {
+    Eq = 4,
+    Ne = 12,
+    EqIdx = 0,
+    NeIdx = 8,
+}
+
 #[macro_use]
 mod sealed {
     use super::*;
@@ -1922,13 +1937,6 @@ mod sealed {
         };
     }
 
-    enum FindImm {
-        Eq = 4,
-        Ne = 12,
-        EqIdx = 0,
-        NeIdx = 8,
-    }
-
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorFindAnyEq<Other> {
         type Result;
@@ -3254,6 +3262,56 @@ mod sealed {
         vector_float
         vector_double
     }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorCompareRange: Sized {
+        type Result;
+
+        unsafe fn vstrc<const IMM: u32>(self, b: Self, c: Self) -> Self::Result;
+        unsafe fn vstrcs<const IMM: u32>(self, b: Self, c: Self) -> (Self::Result, i32);
+    }
+
+    macro_rules! impl_compare_range {
+        ($($ty:ident $vstrc:ident $vstrcs:ident)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorCompareRange for $ty {
+                    type Result = t_b!($ty);
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vstrc<const IMM: u32>(self, b: Self, c: Self) -> Self::Result {
+                        const {
+                            if !matches!(IMM, 0 | 4 | 8 | 12) {
+                                panic!("IMM needs to be one of 0, 4, 8, 12");
+                            }
+                        };
+
+                        $vstrc(self, b, c, IMM)
+                    }
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vstrcs<const IMM: u32>(self, b: Self, c: Self) -> (Self::Result, i32) {
+                        const {
+                            if !matches!(IMM, 0 | 4 | 8 | 12) {
+                                panic!("IMM needs to be one of 0, 4, 8, 12");
+                            }
+                        };
+
+                        let PackedTuple { x, y } = $vstrcs(self, b, c, IMM);
+                        (x,y)
+                    }
+                }
+            )*
+        }
+    }
+
+    impl_compare_range! {
+        vector_unsigned_char    vstrcb vstrcbs
+        vector_unsigned_short   vstrch vstrchs
+        vector_unsigned_int     vstrcf vstrcfs
+    }
 }
 
 /// Load Count to Block Boundary
@@ -4662,6 +4720,13 @@ pub unsafe fn vec_srdb<T: sealed::VectorSrdb, const C: u32>(a: T, b: T) -> T {
     a.vec_srdb::<C>(b)
 }
 
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmprg<T: sealed::VectorCompareRange>(a: T, b: T, c: T) -> T::Result {
+    a.vstrc::<{ FindImm::Eq as u32 }>(b, c)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -5936,8 +6001,32 @@ mod tests {
 
         unsafe {
             let d = vec_srdb::<_, 4>(a, b);
-            println!("{:x?}", &d);
             assert_eq!(d.as_array(), &[0xABBBBBBBBBBBBBBB, 0xBBBBBBBBBBBBBBBB]);
         }
     }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_cmprg() {
+        const GT: u32 = 0x20000000;
+        const LT: u32 = 0x40000000;
+        const EQ: u32 = 0x80000000;
+
+        let a = vector_unsigned_int([11, 22, 33, 44]);
+        let b = vector_unsigned_int([10, 20, 30, 40]);
+
+        let c = vector_unsigned_int([GT, LT, GT, LT]);
+        let d = unsafe { vec_cmprg(a, b, c) };
+        assert_eq!(d.as_array(), &[!0, 0, !0, 0]);
+
+        let c = vector_unsigned_int([GT, LT, 0, 0]);
+        let d = unsafe { vec_cmprg(a, b, c) };
+        assert_eq!(d.as_array(), &[!0, 0, 0, 0]);
+
+        let a = vector_unsigned_int([11, 22, 33, 30]);
+        let b = vector_unsigned_int([10, 20, 30, 30]);
+
+        let c = vector_unsigned_int([GT, LT, EQ, EQ]);
+        let d = unsafe { vec_cmprg(a, b, c) };
+        assert_eq!(d.as_array(), &[!0, 0, 0, !0]);
+    }
 }

From 6a03b2fae4e71959405f50f80612f6eb47bd9660 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Thu, 6 Mar 2025 16:32:16 +0100
Subject: [PATCH 42/57] add `vec_cmpnrg`

---
 crates/core_arch/src/s390x/vector.rs | 38 +++++++++++++++++++++++++---
 1 file changed, 34 insertions(+), 4 deletions(-)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index accae48b29..72514c34fb 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -4720,6 +4720,7 @@ pub unsafe fn vec_srdb<T: sealed::VectorSrdb, const C: u32>(a: T, b: T) -> T {
     a.vec_srdb::<C>(b)
 }
 
+/// Vector Compare Ranges
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
@@ -4727,6 +4728,14 @@ pub unsafe fn vec_cmprg<T: sealed::VectorCompareRange>(a: T, b: T, c: T) -> T::R
     a.vstrc::<{ FindImm::Eq as u32 }>(b, c)
 }
 
+/// Vector Compare Not in Ranges
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmpnrg<T: sealed::VectorCompareRange>(a: T, b: T, c: T) -> T::Result {
+    a.vstrc::<{ FindImm::Ne as u32 }>(b, c)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -6005,12 +6014,12 @@ mod tests {
         }
     }
 
+    const GT: u32 = 0x20000000;
+    const LT: u32 = 0x40000000;
+    const EQ: u32 = 0x80000000;
+
     #[simd_test(enable = "vector")]
     fn test_vec_cmprg() {
-        const GT: u32 = 0x20000000;
-        const LT: u32 = 0x40000000;
-        const EQ: u32 = 0x80000000;
-
         let a = vector_unsigned_int([11, 22, 33, 44]);
         let b = vector_unsigned_int([10, 20, 30, 40]);
 
@@ -6029,4 +6038,25 @@ mod tests {
         let d = unsafe { vec_cmprg(a, b, c) };
         assert_eq!(d.as_array(), &[!0, 0, 0, !0]);
     }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_cmpnrg() {
+        let a = vector_unsigned_int([11, 22, 33, 44]);
+        let b = vector_unsigned_int([10, 20, 30, 40]);
+
+        let c = vector_unsigned_int([GT, LT, GT, LT]);
+        let d = unsafe { vec_cmpnrg(a, b, c) };
+        assert_eq!(d.as_array(), &[0, !0, 0, !0]);
+
+        let c = vector_unsigned_int([GT, LT, 0, 0]);
+        let d = unsafe { vec_cmpnrg(a, b, c) };
+        assert_eq!(d.as_array(), &[0, !0, !0, !0]);
+
+        let a = vector_unsigned_int([11, 22, 33, 30]);
+        let b = vector_unsigned_int([10, 20, 30, 30]);
+
+        let c = vector_unsigned_int([GT, LT, EQ, EQ]);
+        let d = unsafe { vec_cmpnrg(a, b, c) };
+        assert_eq!(d.as_array(), &[0, !0, !0, 0]);
+    }
 }

From 434d285c3e5c3e08f238536bf940abbdac06ddcf Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Thu, 6 Mar 2025 16:35:33 +0100
Subject: [PATCH 43/57] add `vec_cmprg_idx` and `vec_cmpnrg_idx`

---
 crates/core_arch/src/s390x/vector.rs | 36 ++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 72514c34fb..e4de4d107d 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -4736,6 +4736,22 @@ pub unsafe fn vec_cmpnrg<T: sealed::VectorCompareRange>(a: T, b: T, c: T) -> T::
     a.vstrc::<{ FindImm::Ne as u32 }>(b, c)
 }
 
+/// Vector Compare Ranges Index
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmprg_idx<T: sealed::VectorCompareRange>(a: T, b: T, c: T) -> T::Result {
+    a.vstrc::<{ FindImm::EqIdx as u32 }>(b, c)
+}
+
+/// Vector Compare Not in Ranges Index
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmpnrg_idx<T: sealed::VectorCompareRange>(a: T, b: T, c: T) -> T::Result {
+    a.vstrc::<{ FindImm::NeIdx as u32 }>(b, c)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -6059,4 +6075,24 @@ mod tests {
         let d = unsafe { vec_cmpnrg(a, b, c) };
         assert_eq!(d.as_array(), &[0, !0, !0, 0]);
     }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_cmprg_idx() {
+        let a = vector_unsigned_int([1, 11, 22, 33]);
+        let b = vector_unsigned_int([10, 20, 30, 40]);
+
+        let c = vector_unsigned_int([GT, LT, GT, LT]);
+        let d = unsafe { vec_cmprg_idx(a, b, c) };
+        assert_eq!(d.as_array(), &[0, 4, 0, 0]);
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_cmpnrg_idx() {
+        let a = vector_unsigned_int([1, 11, 22, 33]);
+        let b = vector_unsigned_int([10, 20, 30, 40]);
+
+        let c = vector_unsigned_int([GT, LT, GT, LT]);
+        let d = unsafe { vec_cmpnrg_idx(a, b, c) };
+        assert_eq!(d.as_array(), &[0, 0, 0, 0]);
+    }
 }

From 66b2a463e6bab3811e577b9e79e412973edd05e6 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Thu, 6 Mar 2025 16:39:16 +0100
Subject: [PATCH 44/57] add `vec_cmprg_cc` and friends

---
 crates/core_arch/src/s390x/vector.rs | 40 ++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index e4de4d107d..dcd1aa6947 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -4752,6 +4752,46 @@ pub unsafe fn vec_cmpnrg_idx<T: sealed::VectorCompareRange>(a: T, b: T, c: T) ->
     a.vstrc::<{ FindImm::NeIdx as u32 }>(b, c)
 }
 
+/// Vector Compare Ranges with Condition Code
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmprg_cc<T: sealed::VectorCompareRange>(a: T, b: T, c: T, d: *mut i32) -> T::Result {
+    let (x,y) = a.vstrcs::<{ FindImm::Eq as u32 }>(b, c);
+    d.write(y);
+    x
+}
+
+/// Vector Compare Not in Ranges with Condition Code
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmpnrg_cc<T: sealed::VectorCompareRange>(a: T, b: T, c: T, d: *mut i32) -> T::Result {
+    let (x,y) = a.vstrcs::<{ FindImm::Ne as u32 }>(b, c);
+    d.write(y);
+    x
+}
+
+/// Vector Compare Ranges Index with Condition Code
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmprg_idx_cc<T: sealed::VectorCompareRange>(a: T, b: T, c: T, d: *mut i32) -> T::Result {
+    let (x,y) = a.vstrcs::<{ FindImm::EqIdx as u32 }>(b, c);
+    d.write(y);
+    x
+}
+
+/// Vector Compare Not in Ranges Index with Condition Code
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmpnrg_idx_cc<T: sealed::VectorCompareRange>(a: T, b: T, c: T, d: *mut i32) -> T::Result {
+    let (x,y) = a.vstrcs::<{ FindImm::NeIdx as u32 }>(b, c);
+    d.write(y);
+    x
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;

From 4dbc8afdfe7d3693960840098371a90e05048082 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Thu, 6 Mar 2025 16:48:18 +0100
Subject: [PATCH 45/57] add `vec_cmprg_or_0_idx` and `vec_cmpnrg_or_0_idx`

---
 crates/core_arch/src/s390x/vector.rs | 124 +++++++++++++++++++++------
 1 file changed, 100 insertions(+), 24 deletions(-)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index dcd1aa6947..54bfc98af5 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -243,6 +243,14 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vstrcbs"] fn vstrcbs(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char, d: u32) -> PackedTuple<vector_bool_char, i32>;
     #[link_name = "llvm.s390.vstrchs"] fn vstrchs(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short, d: u32) -> PackedTuple<vector_bool_short, i32>;
     #[link_name = "llvm.s390.vstrcfs"] fn vstrcfs(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int, d: u32) -> PackedTuple<vector_bool_int, i32>;
+
+    #[link_name = "llvm.s390.vstrczb"] fn vstrczb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char, d: u32) -> vector_bool_char;
+    #[link_name = "llvm.s390.vstrczh"] fn vstrczh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short, d: u32) -> vector_bool_short;
+    #[link_name = "llvm.s390.vstrczf"] fn vstrczf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int, d: u32) -> vector_bool_int;
+
+    #[link_name = "llvm.s390.vstrczbs"] fn vstrczbs(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char, d: u32) -> PackedTuple<vector_bool_char, i32>;
+    #[link_name = "llvm.s390.vstrczhs"] fn vstrczhs(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short, d: u32) -> PackedTuple<vector_bool_short, i32>;
+    #[link_name = "llvm.s390.vstrczfs"] fn vstrczfs(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int, d: u32) -> PackedTuple<vector_bool_int, i32>;
 }
 
 impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -3268,11 +3276,19 @@ mod sealed {
         type Result;
 
         unsafe fn vstrc<const IMM: u32>(self, b: Self, c: Self) -> Self::Result;
+        unsafe fn vstrcz<const IMM: u32>(self, b: Self, c: Self) -> Self::Result;
         unsafe fn vstrcs<const IMM: u32>(self, b: Self, c: Self) -> (Self::Result, i32);
+        unsafe fn vstrczs<const IMM: u32>(self, b: Self, c: Self) -> (Self::Result, i32);
+    }
+
+    const fn validate_compare_range_imm(imm: u32) {
+        if !matches!(imm, 0 | 4 | 8 | 12) {
+            panic!("IMM needs to be one of 0, 4, 8, 12");
+        }
     }
 
     macro_rules! impl_compare_range {
-        ($($ty:ident $vstrc:ident $vstrcs:ident)*) => {
+        ($($ty:ident $vstrc:ident $vstrcs:ident $vstrcz:ident $vstrczs:ident)*) => {
             $(
                 #[unstable(feature = "stdarch_s390x", issue = "135681")]
                 impl VectorCompareRange for $ty {
@@ -3281,36 +3297,41 @@ mod sealed {
                     #[inline]
                     #[target_feature(enable = "vector")]
                     unsafe fn vstrc<const IMM: u32>(self, b: Self, c: Self) -> Self::Result {
-                        const {
-                            if !matches!(IMM, 0 | 4 | 8 | 12) {
-                                panic!("IMM needs to be one of 0, 4, 8, 12");
-                            }
-                        };
-
+                        const { validate_compare_range_imm };
                         $vstrc(self, b, c, IMM)
                     }
 
                     #[inline]
                     #[target_feature(enable = "vector")]
-                    unsafe fn vstrcs<const IMM: u32>(self, b: Self, c: Self) -> (Self::Result, i32) {
-                        const {
-                            if !matches!(IMM, 0 | 4 | 8 | 12) {
-                                panic!("IMM needs to be one of 0, 4, 8, 12");
-                            }
-                        };
+                    unsafe fn vstrcz<const IMM: u32>(self, b: Self, c: Self) -> Self::Result {
+                        const { validate_compare_range_imm };
+                        $vstrcz(self, b, c, IMM)
+                    }
 
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vstrcs<const IMM: u32>(self, b: Self, c: Self) -> (Self::Result, i32) {
+                        const { validate_compare_range_imm };
                         let PackedTuple { x, y } = $vstrcs(self, b, c, IMM);
                         (x,y)
                     }
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vstrczs<const IMM: u32>(self, b: Self, c: Self) -> (Self::Result, i32) {
+                        const { validate_compare_range_imm };
+                        let PackedTuple { x, y } = $vstrczs(self, b, c, IMM);
+                        (x,y)
+                    }
                 }
             )*
         }
     }
 
     impl_compare_range! {
-        vector_unsigned_char    vstrcb vstrcbs
-        vector_unsigned_short   vstrch vstrchs
-        vector_unsigned_int     vstrcf vstrcfs
+        vector_unsigned_char    vstrcb vstrcbs vstrczb vstrczbs
+        vector_unsigned_short   vstrch vstrchs vstrczh vstrczhs
+        vector_unsigned_int     vstrcf vstrcfs vstrczf vstrczfs
     }
 }
 
@@ -4756,8 +4777,13 @@ pub unsafe fn vec_cmpnrg_idx<T: sealed::VectorCompareRange>(a: T, b: T, c: T) ->
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_cmprg_cc<T: sealed::VectorCompareRange>(a: T, b: T, c: T, d: *mut i32) -> T::Result {
-    let (x,y) = a.vstrcs::<{ FindImm::Eq as u32 }>(b, c);
+pub unsafe fn vec_cmprg_cc<T: sealed::VectorCompareRange>(
+    a: T,
+    b: T,
+    c: T,
+    d: *mut i32,
+) -> T::Result {
+    let (x, y) = a.vstrcs::<{ FindImm::Eq as u32 }>(b, c);
     d.write(y);
     x
 }
@@ -4766,8 +4792,13 @@ pub unsafe fn vec_cmprg_cc<T: sealed::VectorCompareRange>(a: T, b: T, c: T, d: *
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_cmpnrg_cc<T: sealed::VectorCompareRange>(a: T, b: T, c: T, d: *mut i32) -> T::Result {
-    let (x,y) = a.vstrcs::<{ FindImm::Ne as u32 }>(b, c);
+pub unsafe fn vec_cmpnrg_cc<T: sealed::VectorCompareRange>(
+    a: T,
+    b: T,
+    c: T,
+    d: *mut i32,
+) -> T::Result {
+    let (x, y) = a.vstrcs::<{ FindImm::Ne as u32 }>(b, c);
     d.write(y);
     x
 }
@@ -4776,8 +4807,13 @@ pub unsafe fn vec_cmpnrg_cc<T: sealed::VectorCompareRange>(a: T, b: T, c: T, d:
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_cmprg_idx_cc<T: sealed::VectorCompareRange>(a: T, b: T, c: T, d: *mut i32) -> T::Result {
-    let (x,y) = a.vstrcs::<{ FindImm::EqIdx as u32 }>(b, c);
+pub unsafe fn vec_cmprg_idx_cc<T: sealed::VectorCompareRange>(
+    a: T,
+    b: T,
+    c: T,
+    d: *mut i32,
+) -> T::Result {
+    let (x, y) = a.vstrcs::<{ FindImm::EqIdx as u32 }>(b, c);
     d.write(y);
     x
 }
@@ -4786,12 +4822,32 @@ pub unsafe fn vec_cmprg_idx_cc<T: sealed::VectorCompareRange>(a: T, b: T, c: T,
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_cmpnrg_idx_cc<T: sealed::VectorCompareRange>(a: T, b: T, c: T, d: *mut i32) -> T::Result {
-    let (x,y) = a.vstrcs::<{ FindImm::NeIdx as u32 }>(b, c);
+pub unsafe fn vec_cmpnrg_idx_cc<T: sealed::VectorCompareRange>(
+    a: T,
+    b: T,
+    c: T,
+    d: *mut i32,
+) -> T::Result {
+    let (x, y) = a.vstrcs::<{ FindImm::NeIdx as u32 }>(b, c);
     d.write(y);
     x
 }
 
+/// Vector Compare Ranges or Zero Index#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmprg_or_0_idx<T: sealed::VectorCompareRange>(a: T, b: T, c: T) -> T::Result {
+    a.vstrcz::<{ FindImm::EqIdx as u32 }>(b, c)
+}
+
+/// Vector Compare Not in Ranges or Zero Index
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmpnrg_or_0_idx<T: sealed::VectorCompareRange>(a: T, b: T, c: T) -> T::Result {
+    a.vstrcz::<{ FindImm::NeIdx as u32 }>(b, c)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -6135,4 +6191,24 @@ mod tests {
         let d = unsafe { vec_cmpnrg_idx(a, b, c) };
         assert_eq!(d.as_array(), &[0, 0, 0, 0]);
     }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_cmprg_or_0_idx() {
+        let a = vector_unsigned_int([1, 0, 22, 33]);
+        let b = vector_unsigned_int([10, 20, 30, 40]);
+
+        let c = vector_unsigned_int([GT, LT, GT, LT]);
+        let d = unsafe { vec_cmprg_or_0_idx(a, b, c) };
+        assert_eq!(d.as_array(), &[0, 4, 0, 0]);
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_cmpnrg_or_0_idx() {
+        let a = vector_unsigned_int([11, 33, 0, 22]);
+        let b = vector_unsigned_int([10, 20, 30, 40]);
+
+        let c = vector_unsigned_int([GT, LT, GT, LT]);
+        let d = unsafe { vec_cmpnrg_or_0_idx(a, b, c) };
+        assert_eq!(d.as_array(), &[0, 8, 0, 0]);
+    }
 }

From 83d5d2a7a2f6668ee38322913fc09d941cadce77 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Thu, 6 Mar 2025 16:50:04 +0100
Subject: [PATCH 46/57] add `vec_cmprg_or_0_idx_cc` and
 `vec_cmpnrg_or_0_idx_cc`

---
 crates/core_arch/src/s390x/vector.rs | 30 ++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 54bfc98af5..a0d60e55d1 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -4848,6 +4848,36 @@ pub unsafe fn vec_cmpnrg_or_0_idx<T: sealed::VectorCompareRange>(a: T, b: T, c:
     a.vstrcz::<{ FindImm::NeIdx as u32 }>(b, c)
 }
 
+/// Vector Compare Ranges or Zero Index with Condition Code
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmprg_or_0_idx_cc<T: sealed::VectorCompareRange>(
+    a: T,
+    b: T,
+    c: T,
+    d: *mut i32,
+) -> T::Result {
+    let (x, y) = a.vstrczs::<{ FindImm::EqIdx as u32 }>(b, c);
+    d.write(y);
+    x
+}
+
+/// Vector Compare Not in Ranges or Zero Index with Condition Code
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmpnrg_or_0_idx_cc<T: sealed::VectorCompareRange>(
+    a: T,
+    b: T,
+    c: T,
+    d: *mut i32,
+) -> T::Result {
+    let (x, y) = a.vstrczs::<{ FindImm::NeIdx as u32 }>(b, c);
+    d.write(y);
+    x
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;

From e67cadf12e6c55c6fd8ae2d850c4fd097744c228 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Thu, 6 Mar 2025 16:51:22 +0100
Subject: [PATCH 47/57] let's not use `&mut` until we get confirmation it's OK

---
 crates/core_arch/src/s390x/vector.rs | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index a0d60e55d1..c9d29c8d7d 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -2903,14 +2903,14 @@ mod sealed {
             self,
             b: Self,
             c: vector_unsigned_char,
-            d: &mut i32,
+            d: *mut i32,
         ) -> vector_unsigned_char;
 
         unsafe fn vec_search_string_until_zero_cc(
             self,
             b: Self,
             c: vector_unsigned_char,
-            d: &mut i32,
+            d: *mut i32,
         ) -> vector_unsigned_char;
     }
 
@@ -2921,17 +2921,17 @@ mod sealed {
                 impl VectorSearchString for $ty {
                     #[inline]
                     #[target_feature(enable = "vector")]
-                    unsafe fn vec_search_string_cc(self, b: Self, c: vector_unsigned_char, d: &mut i32) -> vector_unsigned_char {
+                    unsafe fn vec_search_string_cc(self, b: Self, c: vector_unsigned_char, d: *mut i32) -> vector_unsigned_char {
                         let PackedTuple { x,y } = $intr_s(transmute(self), transmute(b), c);
-                        *d = y;
+                        d.write(y);
                         x
                     }
 
                     #[inline]
                     #[target_feature(enable = "vector")]
-                    unsafe fn vec_search_string_until_zero_cc(self, b: Self, c: vector_unsigned_char, d: &mut i32) -> vector_unsigned_char {
+                    unsafe fn vec_search_string_until_zero_cc(self, b: Self, c: vector_unsigned_char, d: *mut i32) -> vector_unsigned_char {
                         let PackedTuple { x,y } = $intr_sz(transmute(self), transmute(b), c);
-                        *d = y;
+                        d.write(y);
                         x
                     }
                 }
@@ -4584,7 +4584,7 @@ pub unsafe fn vec_search_string_cc<T: sealed::VectorSearchString>(
     a: T,
     b: T,
     c: vector_unsigned_char,
-    d: &mut i32,
+    d: *mut i32,
 ) -> vector_unsigned_char {
     a.vec_search_string_cc(b, c, d)
 }
@@ -4597,7 +4597,7 @@ pub unsafe fn vec_search_string_until_zero_cc<T: sealed::VectorSearchString>(
     a: T,
     b: T,
     c: vector_unsigned_char,
-    d: &mut i32,
+    d: *mut i32,
 ) -> vector_unsigned_char {
     a.vec_search_string_until_zero_cc(b, c, d)
 }

From c872f5b136a94a5b808da0be0e8eda788bcc1d6b Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Fri, 7 Mar 2025 23:47:58 +0100
Subject: [PATCH 48/57] add `vec_cmpgt`, `vec_cmplt`, `vec_cmpge`, `vec_cmple`

---
 crates/core_arch/src/s390x/vector.rs | 112 +++++++++++++++++++++++++++
 1 file changed, 112 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index c9d29c8d7d..803ed12b7e 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -3333,6 +3333,62 @@ mod sealed {
         vector_unsigned_short   vstrch vstrchs vstrczh vstrczhs
         vector_unsigned_int     vstrcf vstrcfs vstrczf vstrczfs
     }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorComparePredicate: Sized {
+        type Result;
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_cmpgt(self, other: Self) -> Self::Result {
+            simd_gt(self, other)
+        }
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_cmpge(self, other: Self) -> Self::Result {
+            simd_ge(self, other)
+        }
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_cmplt(self, other: Self) -> Self::Result {
+            simd_lt(self, other)
+        }
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_cmple(self, other: Self) -> Self::Result {
+            simd_le(self, other)
+        }
+    }
+
+    macro_rules! impl_compare_predicate {
+        ($($ty:ident)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorComparePredicate for $ty {
+                    type Result = t_b!($ty);
+                }
+            )*
+        }
+    }
+
+    impl_compare_predicate! {
+        vector_signed_char
+        vector_unsigned_char
+
+        vector_signed_short
+        vector_unsigned_short
+
+        vector_signed_int
+        vector_unsigned_int
+        vector_float
+
+        vector_signed_long_long
+        vector_unsigned_long_long
+        vector_double
+    }
 }
 
 /// Load Count to Block Boundary
@@ -4878,6 +4934,38 @@ pub unsafe fn vec_cmpnrg_or_0_idx_cc<T: sealed::VectorCompareRange>(
     x
 }
 
+/// Vector Compare Greater Than
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmpgt<T: sealed::VectorComparePredicate>(a: T, b: T) -> T::Result {
+    a.vec_cmpgt(b)
+}
+
+/// Vector Compare Greater Than or Equal
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmpge<T: sealed::VectorComparePredicate>(a: T, b: T) -> T::Result {
+    a.vec_cmpge(b)
+}
+
+/// Vector Compare Less
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmplt<T: sealed::VectorComparePredicate>(a: T, b: T) -> T::Result {
+    a.vec_cmplt(b)
+}
+
+/// Vector Compare Less Than or Equal
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmple<T: sealed::VectorComparePredicate>(a: T, b: T) -> T::Result {
+    a.vec_cmple(b)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -6241,4 +6329,28 @@ mod tests {
         let d = unsafe { vec_cmpnrg_or_0_idx(a, b, c) };
         assert_eq!(d.as_array(), &[0, 8, 0, 0]);
     }
+
+    test_vec_2! { test_vec_cmpgt, vec_cmpgt, f32x4, f32x4 -> i32x4,
+        [1.0, f32::NAN, f32::NAN, 3.14],
+        [2.0, f32::NAN, 5.0, 2.0],
+        [0, 0, 0, !0]
+    }
+
+    test_vec_2! { test_vec_cmpge, vec_cmpge, f32x4, f32x4 -> i32x4,
+        [1.0, f32::NAN, f32::NAN, 3.14],
+        [1.0, f32::NAN, 5.0, 2.0],
+        [!0, 0, 0, !0]
+    }
+
+    test_vec_2! { test_vec_cmplt, vec_cmplt, f32x4, f32x4 -> i32x4,
+        [1.0, f32::NAN, f32::NAN, 2.0],
+        [2.0, f32::NAN, 5.0, 2.0],
+        [!0, 0, 0, 0]
+    }
+
+    test_vec_2! { test_vec_cmple, vec_cmple, f32x4, f32x4 -> i32x4,
+        [1.0, f32::NAN, f32::NAN, 2.0],
+        [1.0, f32::NAN, 5.0, 3.14],
+        [!0, 0, 0, !0]
+    }
 }

From e8508b58f78d9a5c291ca894bfd39c4564f677cf Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Fri, 7 Mar 2025 23:59:26 +0100
Subject: [PATCH 49/57] add `vec_cmpeq` and `vec_cmpne`

---
 crates/core_arch/src/s390x/vector.rs | 76 ++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 803ed12b7e..ef75124222 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -3389,6 +3389,54 @@ mod sealed {
         vector_unsigned_long_long
         vector_double
     }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorEquality: Sized {
+        type Result;
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_cmpeq(self, other: Self) -> Self::Result {
+            simd_eq(self, other)
+        }
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_cmpne(self, other: Self) -> Self::Result {
+            simd_ne(self, other)
+        }
+    }
+
+    macro_rules! impl_compare_equality {
+        ($($ty:ident)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorEquality for $ty {
+                    type Result = t_b!($ty);
+                }
+            )*
+        }
+    }
+
+    impl_compare_equality! {
+        vector_bool_char
+        vector_signed_char
+        vector_unsigned_char
+
+        vector_bool_short
+        vector_signed_short
+        vector_unsigned_short
+
+        vector_bool_int
+        vector_signed_int
+        vector_unsigned_int
+        vector_float
+
+        vector_bool_long_long
+        vector_signed_long_long
+        vector_unsigned_long_long
+        vector_double
+    }
 }
 
 /// Load Count to Block Boundary
@@ -4934,6 +4982,22 @@ pub unsafe fn vec_cmpnrg_or_0_idx_cc<T: sealed::VectorCompareRange>(
     x
 }
 
+/// Vector Compare Equal
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmpeq<T: sealed::VectorEquality>(a: T, b: T) -> T::Result {
+    a.vec_cmpeq(b)
+}
+
+/// Vector Compare Not Equal
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmpne<T: sealed::VectorEquality>(a: T, b: T) -> T::Result {
+    a.vec_cmpne(b)
+}
+
 /// Vector Compare Greater Than
 #[inline]
 #[target_feature(enable = "vector")]
@@ -6353,4 +6417,16 @@ mod tests {
         [1.0, f32::NAN, 5.0, 3.14],
         [!0, 0, 0, !0]
     }
+
+    test_vec_2! { test_vec_cmpeq, vec_cmpeq, f32x4, f32x4 -> i32x4,
+        [1.0, f32::NAN, f32::NAN, 2.0],
+        [1.0, f32::NAN, 5.0, 3.14],
+        [!0, 0, 0, 0]
+    }
+
+    test_vec_2! { test_vec_cmpne, vec_cmpne, f32x4, f32x4 -> i32x4,
+        [1.0, f32::NAN, f32::NAN, 2.0],
+        [1.0, f32::NAN, 5.0, 3.14],
+        [0, !0, !0, !0]
+    }
 }

From 9e19b0ed41d6ee6731c517994f6bd3f04b59d9a1 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Sat, 8 Mar 2025 00:37:30 +0100
Subject: [PATCH 50/57] add `vec_cmpeq_idx` and variations

---
 crates/core_arch/src/s390x/vector.rs | 206 +++++++++++++++++++++++++++
 1 file changed, 206 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index ef75124222..828b6a0423 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -251,6 +251,38 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vstrczbs"] fn vstrczbs(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char, d: u32) -> PackedTuple<vector_bool_char, i32>;
     #[link_name = "llvm.s390.vstrczhs"] fn vstrczhs(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short, d: u32) -> PackedTuple<vector_bool_short, i32>;
     #[link_name = "llvm.s390.vstrczfs"] fn vstrczfs(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int, d: u32) -> PackedTuple<vector_bool_int, i32>;
+
+    #[link_name = "llvm.s390.vfeeb"] fn vfeeb(a: i8x16, b: i8x16) -> i8x16;
+    #[link_name = "llvm.s390.vfeeh"] fn vfeeh(a: i16x8, b: i16x8) -> i16x8;
+    #[link_name = "llvm.s390.vfeef"] fn vfeef(a: i32x4, b: i32x4) -> i32x4;
+
+    #[link_name = "llvm.s390.vfeezb"] fn vfeezb(a: i8x16, b: i8x16) -> i8x16;
+    #[link_name = "llvm.s390.vfeezh"] fn vfeezh(a: i16x8, b: i16x8) -> i16x8;
+    #[link_name = "llvm.s390.vfeezf"] fn vfeezf(a: i32x4, b: i32x4) -> i32x4;
+
+    #[link_name = "llvm.s390.vfeebs"] fn vfeebs(a: i8x16, b: i8x16) -> PackedTuple<i8x16, i32>;
+    #[link_name = "llvm.s390.vfeehs"] fn vfeehs(a: i16x8, b: i16x8) -> PackedTuple<i16x8, i32>;
+    #[link_name = "llvm.s390.vfeefs"] fn vfeefs(a: i32x4, b: i32x4) -> PackedTuple<i32x4, i32>;
+
+    #[link_name = "llvm.s390.vfeezbs"] fn vfeezbs(a: i8x16, b: i8x16) -> PackedTuple<i8x16, i32>;
+    #[link_name = "llvm.s390.vfeezhs"] fn vfeezhs(a: i16x8, b: i16x8) -> PackedTuple<i16x8, i32>;
+    #[link_name = "llvm.s390.vfeezfs"] fn vfeezfs(a: i32x4, b: i32x4) -> PackedTuple<i32x4, i32>;
+
+    #[link_name = "llvm.s390.vfeneb"] fn vfeneb(a: i8x16, b: i8x16) -> i8x16;
+    #[link_name = "llvm.s390.vfeneh"] fn vfeneh(a: i16x8, b: i16x8) -> i16x8;
+    #[link_name = "llvm.s390.vfenef"] fn vfenef(a: i32x4, b: i32x4) -> i32x4;
+
+    #[link_name = "llvm.s390.vfenezb"] fn vfenezb(a: i8x16, b: i8x16) -> i8x16;
+    #[link_name = "llvm.s390.vfenezh"] fn vfenezh(a: i16x8, b: i16x8) -> i16x8;
+    #[link_name = "llvm.s390.vfenezf"] fn vfenezf(a: i32x4, b: i32x4) -> i32x4;
+
+    #[link_name = "llvm.s390.vfenebs"] fn vfenebs(a: i8x16, b: i8x16) -> PackedTuple<i8x16, i32>;
+    #[link_name = "llvm.s390.vfenehs"] fn vfenehs(a: i16x8, b: i16x8) -> PackedTuple<i16x8, i32>;
+    #[link_name = "llvm.s390.vfenefs"] fn vfenefs(a: i32x4, b: i32x4) -> PackedTuple<i32x4, i32>;
+
+    #[link_name = "llvm.s390.vfenezbs"] fn vfenezbs(a: i8x16, b: i8x16) -> PackedTuple<i8x16, i32>;
+    #[link_name = "llvm.s390.vfenezhs"] fn vfenezhs(a: i16x8, b: i16x8) -> PackedTuple<i16x8, i32>;
+    #[link_name = "llvm.s390.vfenezfs"] fn vfenezfs(a: i32x4, b: i32x4) -> PackedTuple<i32x4, i32>;
 }
 
 impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -3437,6 +3469,107 @@ mod sealed {
         vector_unsigned_long_long
         vector_double
     }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorEqualityIdx: Sized {
+        type Result;
+
+        unsafe fn vec_cmpeq_idx(self, other: Self) -> Self::Result;
+        unsafe fn vec_cmpne_idx(self, other: Self) -> Self::Result;
+
+        unsafe fn vec_cmpeq_idx_cc(self, other: Self, cc: *mut i32) -> Self::Result;
+        unsafe fn vec_cmpne_idx_cc(self, other: Self, cc: *mut i32) -> Self::Result;
+
+        unsafe fn vec_cmpeq_or_0_idx(self, other: Self) -> Self::Result;
+        unsafe fn vec_cmpne_or_0_idx(self, other: Self) -> Self::Result;
+
+        unsafe fn vec_cmpeq_or_0_idx_cc(self, other: Self, cc: *mut i32) -> Self::Result;
+        unsafe fn vec_cmpne_or_0_idx_cc(self, other: Self, cc: *mut i32) -> Self::Result;
+    }
+
+    macro_rules! impl_compare_equality_idx {
+        ($($ty:ident $ret:ident
+                $cmpeq:ident $cmpne:ident
+                $cmpeq_or_0:ident $cmpne_or_0:ident
+                $cmpeq_cc:ident $cmpne_cc:ident
+                $cmpeq_or_0_cc:ident $cmpne_or_0_cc:ident
+        )*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorEqualityIdx for $ty {
+                    type Result = $ret;
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_cmpeq_idx(self, other: Self) -> Self::Result {
+                        transmute($cmpeq(transmute(self), transmute(other)))
+                    }
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_cmpne_idx(self, other: Self) -> Self::Result {
+                        transmute($cmpne(transmute(self), transmute(other)))
+                    }
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_cmpeq_or_0_idx(self, other: Self) -> Self::Result {
+                        transmute($cmpeq_or_0(transmute(self), transmute(other)))
+                    }
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_cmpne_or_0_idx(self, other: Self) -> Self::Result {
+                        transmute($cmpne_or_0(transmute(self), transmute(other)))
+                    }
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_cmpeq_idx_cc(self, other: Self, cc: *mut i32) -> Self::Result {
+                        let PackedTuple { x, y } = $cmpeq_cc(transmute(self), transmute(other));
+                        *cc = y;
+                        transmute(x)
+                    }
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_cmpne_idx_cc(self, other: Self, cc: *mut i32) -> Self::Result {
+                        let PackedTuple { x, y } = $cmpne_cc(transmute(self), transmute(other));
+                        *cc = y;
+                        transmute(x)
+                    }
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_cmpeq_or_0_idx_cc(self, other: Self, cc: *mut i32) -> Self::Result {
+                        let PackedTuple { x, y } = $cmpeq_or_0_cc(transmute(self), transmute(other));
+                        *cc = y;
+                        transmute(x)
+                    }
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_cmpne_or_0_idx_cc(self, other: Self, cc: *mut i32) -> Self::Result {
+                        let PackedTuple { x, y } = $cmpne_or_0_cc(transmute(self), transmute(other));
+                        *cc = y;
+                        transmute(x)
+                    }
+                }
+            )*
+        }
+    }
+
+    impl_compare_equality_idx! {
+        vector_signed_char vector_signed_char               vfeeb vfeneb vfeezb vfenezb vfeebs vfenebs vfeezbs vfenezbs
+        vector_bool_char vector_unsigned_char               vfeeb vfeneb vfeezb vfenezb vfeebs vfenebs vfeezbs vfenezbs
+        vector_unsigned_char vector_unsigned_char           vfeeb vfeneb vfeezb vfenezb vfeebs vfenebs vfeezbs vfenezbs
+        vector_signed_short vector_signed_short             vfeeh vfeneh vfeezh vfenezh vfeehs vfenehs vfeezhs vfenezhs
+        vector_bool_short  vector_unsigned_short            vfeeh vfeneh vfeezh vfenezh vfeehs vfenehs vfeezhs vfenezhs
+        vector_unsigned_short vector_unsigned_short         vfeeh vfeneh vfeezh vfenezh vfeehs vfenehs vfeezhs vfenezhs
+        vector_signed_int vector_signed_int                 vfeef vfenef vfeezf vfenezf vfeefs vfenefs vfeezfs vfenezfs
+        vector_bool_int  vector_unsigned_int                vfeef vfenef vfeezf vfenezf vfeefs vfenefs vfeezfs vfenezfs
+        vector_unsigned_int vector_unsigned_int             vfeef vfenef vfeezf vfenezf vfeefs vfenefs vfeezfs vfenezfs
+    }
 }
 
 /// Load Count to Block Boundary
@@ -5030,6 +5163,79 @@ pub unsafe fn vec_cmple<T: sealed::VectorComparePredicate>(a: T, b: T) -> T::Res
     a.vec_cmple(b)
 }
 
+/// Vector Compare Equal Index
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmpeq_idx<T: sealed::VectorEqualityIdx>(a: T, b: T) -> T::Result {
+    a.vec_cmpeq_idx(b)
+}
+/// Vector Compare Not Equal Index
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmpne_idx<T: sealed::VectorEqualityIdx>(a: T, b: T) -> T::Result {
+    a.vec_cmpne_idx(b)
+}
+/// Vector Compare Equal Index with Condition Code
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmpeq_idx_cc<T: sealed::VectorEqualityIdx>(
+    a: T,
+    b: T,
+    cc: *mut i32,
+) -> T::Result {
+    a.vec_cmpeq_idx_cc(b, cc)
+}
+/// Vector Compare Not Equal Index with Condition Code
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmpne_idx_cc<T: sealed::VectorEqualityIdx>(
+    a: T,
+    b: T,
+    cc: *mut i32,
+) -> T::Result {
+    a.vec_cmpne_idx_cc(b, cc)
+}
+/// Vector Compare Equal or Zero Index
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmpeq_or_0_idx<T: sealed::VectorEqualityIdx>(a: T, b: T) -> T::Result {
+    a.vec_cmpeq_or_0_idx(b)
+}
+/// Vector Compare Not Equal or Zero Index
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmpne_or_0_idx<T: sealed::VectorEqualityIdx>(a: T, b: T) -> T::Result {
+    a.vec_cmpne_or_0_idx(b)
+}
+/// Vector Compare Equal or Zero Index with Condition Code
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmpeq_or_0_idx_cc<T: sealed::VectorEqualityIdx>(
+    a: T,
+    b: T,
+    cc: *mut i32,
+) -> T::Result {
+    a.vec_cmpeq_or_0_idx_cc(b, cc)
+}
+/// Vector Compare Not Equal or Zero Index with Condition Code
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmpne_or_0_idx_cc<T: sealed::VectorEqualityIdx>(
+    a: T,
+    b: T,
+    cc: *mut i32,
+) -> T::Result {
+    a.vec_cmpne_or_0_idx_cc(b, cc)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;

From 551ecd050157cb1f06d61bb153730934518754eb Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Sat, 8 Mar 2025 16:24:16 +0100
Subject: [PATCH 51/57] add `vec_all_nan`, `vec_any_nan`, `vec_all_numeric` and
 `vec_any_numeric`

---
 crates/core_arch/src/s390x/vector.rs | 96 +++++++++++++++++++++++++---
 1 file changed, 88 insertions(+), 8 deletions(-)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 828b6a0423..1779cff3c4 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -2862,7 +2862,7 @@ mod sealed {
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorFpTestDataClass {
         type Result;
-        unsafe fn vec_fp_test_data_class<const CLASS: u32>(self, ptr: *mut i32) -> Self::Result;
+        unsafe fn vec_fp_test_data_class<const CLASS: u32>(self) -> (Self::Result, i32);
     }
 
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
@@ -2871,10 +2871,9 @@ mod sealed {
 
         #[inline]
         #[target_feature(enable = "vector")]
-        unsafe fn vec_fp_test_data_class<const CLASS: u32>(self, ptr: *mut i32) -> Self::Result {
+        unsafe fn vec_fp_test_data_class<const CLASS: u32>(self) -> (Self::Result, i32) {
             let PackedTuple { x, y } = vftcisb(self, CLASS);
-            unsafe { ptr.write(y) };
-            x
+            (x, y)
         }
     }
 
@@ -2884,10 +2883,9 @@ mod sealed {
 
         #[inline]
         #[target_feature(enable = "vector")]
-        unsafe fn vec_fp_test_data_class<const CLASS: u32>(self, ptr: *mut i32) -> Self::Result {
+        unsafe fn vec_fp_test_data_class<const CLASS: u32>(self) -> (Self::Result, i32) {
             let PackedTuple { x, y } = vftcidb(self, CLASS);
-            unsafe { ptr.write(y) };
-            x
+            (x, y)
         }
     }
 
@@ -4800,7 +4798,37 @@ pub unsafe fn vec_fp_test_data_class<T: sealed::VectorFpTestDataClass, const CLA
     a: T,
     c: *mut i32,
 ) -> T::Result {
-    a.vec_fp_test_data_class::<CLASS>(c)
+    let (x, y) = a.vec_fp_test_data_class::<CLASS>();
+    c.write(y);
+    x
+}
+
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_all_nan<T: sealed::VectorFpTestDataClass>(a: T) -> i32 {
+    i32::from(a.vec_fp_test_data_class::<__VEC_CLASS_FP_NAN>().1 == 0)
+}
+
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_all_numeric<T: sealed::VectorFpTestDataClass>(a: T) -> i32 {
+    i32::from(a.vec_fp_test_data_class::<__VEC_CLASS_FP_NAN>().1 == 3)
+}
+
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_any_nan<T: sealed::VectorFpTestDataClass>(a: T) -> i32 {
+    i32::from(a.vec_fp_test_data_class::<__VEC_CLASS_FP_NAN>().1 != 3)
+}
+
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_any_numeric<T: sealed::VectorFpTestDataClass>(a: T) -> i32 {
+    i32::from(a.vec_fp_test_data_class::<__VEC_CLASS_FP_NAN>().1 != 0)
 }
 
 /// Vector Test under Mask
@@ -6257,6 +6285,58 @@ mod tests {
         }
     }
 
+    #[simd_test(enable = "vector")]
+    fn test_vec_fp_any_all_nan_numeric() {
+        unsafe {
+            assert_eq!(
+                vec_all_nan(vector_double([f64::NAN, f64::NAN])),
+                i32::from(true)
+            );
+            assert_eq!(
+                vec_all_nan(vector_double([f64::NAN, 1.0])),
+                i32::from(false)
+            );
+            assert_eq!(vec_all_nan(vector_double([0.0, 1.0])), i32::from(false));
+
+            assert_eq!(
+                vec_any_nan(vector_double([f64::NAN, f64::NAN])),
+                i32::from(true)
+            );
+            assert_eq!(vec_any_nan(vector_double([f64::NAN, 1.0])), i32::from(true));
+            assert_eq!(vec_any_nan(vector_double([0.0, 1.0])), i32::from(false));
+
+            assert_eq!(
+                vec_all_numeric(vector_double([f64::NAN, f64::NAN])),
+                i32::from(false)
+            );
+            assert_eq!(
+                vec_all_numeric(vector_double([f64::NAN, 1.0])),
+                i32::from(false)
+            );
+            assert_eq!(vec_all_numeric(vector_double([0.0, 1.0])), i32::from(true));
+
+            assert_eq!(
+                vec_any_numeric(vector_double([f64::NAN, f64::NAN])),
+                i32::from(false)
+            );
+            assert_eq!(
+                vec_any_numeric(vector_double([f64::NAN, 1.0])),
+                i32::from(true)
+            );
+            assert_eq!(vec_any_numeric(vector_double([0.0, 1.0])), i32::from(true));
+
+            // "numeric" means "not NaN". infinities are numeric
+            assert_eq!(
+                vec_all_numeric(vector_double([f64::INFINITY, f64::NEG_INFINITY])),
+                i32::from(true)
+            );
+            assert_eq!(
+                vec_any_numeric(vector_double([f64::INFINITY, f64::NEG_INFINITY])),
+                i32::from(true)
+            );
+        }
+    }
+
     #[simd_test(enable = "vector")]
     fn test_vec_test_mask() {
         unsafe {

From db1d9ca4f7aa9f5913d1b592a61055fd0c55ef53 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Sat, 8 Mar 2025 19:10:40 +0100
Subject: [PATCH 52/57] add `vec_any_*` and `vec_all_*`

---
 crates/core_arch/src/s390x/vector.rs | 219 +++++++++++++++++++++++++++
 1 file changed, 219 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 1779cff3c4..b9bff813dc 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -2889,6 +2889,65 @@ mod sealed {
         }
     }
 
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorCompare {
+        unsafe fn vec_all_lt(self, other: Self) -> i32;
+        unsafe fn vec_all_le(self, other: Self) -> i32;
+        unsafe fn vec_all_gt(self, other: Self) -> i32;
+        unsafe fn vec_all_ge(self, other: Self) -> i32;
+    }
+
+    // NOTE: this implementation is currently non-optimal, but it does work for floats even with
+    // only `vector` enabled.
+    //
+    // - https://github.com/llvm/llvm-project/issues/129434
+    // - https://github.com/llvm/llvm-project/issues/130424
+    macro_rules! impl_vec_compare {
+        ($($ty:ident)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorCompare for $ty {
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_all_lt(self, other: Self) -> i32 {
+                        simd_reduce_all(simd_lt::<_, t_b!($ty)>(self, other)) as i32
+                    }
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_all_le(self, other: Self) -> i32 {
+                        simd_reduce_all(simd_le::<_, t_b!($ty)>(self, other)) as i32
+                    }
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_all_gt(self, other: Self) -> i32 {
+                        simd_reduce_all(simd_gt::<_, t_b!($ty)>(self, other)) as i32
+                    }
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_all_ge(self, other: Self) -> i32 {
+                        simd_reduce_all(simd_ge::<_, t_b!($ty)>(self, other)) as i32
+                    }
+                }
+            )*
+        }
+    }
+
+    impl_vec_compare! {
+        vector_signed_char
+        vector_unsigned_char
+
+        vector_signed_short
+        vector_unsigned_short
+
+        vector_signed_int
+        vector_unsigned_int
+        vector_float
+
+        vector_signed_long_long
+        vector_unsigned_long_long
+        vector_double
+    }
+
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorTestMask {
         type Mask;
@@ -5264,6 +5323,166 @@ pub unsafe fn vec_cmpne_or_0_idx_cc<T: sealed::VectorEqualityIdx>(
     a.vec_cmpne_or_0_idx_cc(b, cc)
 }
 
+/// All Elements Equal
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_all_eq<T: sealed::VectorEquality>(a: T, b: T) -> i32 {
+    simd_reduce_all(vec_cmpeq(a, b)) as i32 as i32
+}
+
+/// All Elements Not Equal
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_all_ne<T: sealed::VectorEquality>(a: T, b: T) -> i32 {
+    simd_reduce_all(vec_cmpne(a, b)) as i32
+}
+
+/// Any Element Equal
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_any_eq<T: sealed::VectorEquality>(a: T, b: T) -> i32 {
+    simd_reduce_any(vec_cmpeq(a, b)) as i32
+}
+
+/// Any Element Not Equal
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_any_ne<T: sealed::VectorEquality>(a: T, b: T) -> i32 {
+    simd_reduce_any(vec_cmpne(a, b)) as i32
+}
+
+/// All Elements Less Than
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_all_lt<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    a.vec_all_lt(b)
+}
+
+/// All Elements Less Than or Equal
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_all_le<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    a.vec_all_le(b)
+}
+
+/// All Elements Greater Than
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_all_gt<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    a.vec_all_gt(b)
+}
+
+/// All Elements Greater Than or Equal
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_all_ge<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    a.vec_all_ge(b)
+}
+
+/// All Elements Not Less Than
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_all_nlt<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    vec_all_ge(a, b)
+}
+
+/// All Elements Not Less Than or Equal
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_all_nle<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    vec_all_gt(a, b)
+}
+
+/// All Elements Not Greater Than
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_all_ngt<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    vec_all_le(a, b)
+}
+
+/// All Elements Not Greater Than or Equal
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_all_nge<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    vec_all_lt(a, b)
+}
+
+/// Any Elements Less Than
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_any_lt<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    !vec_all_ge(a, b)
+}
+
+/// Any Elements Less Than or Equal
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_any_le<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    !vec_all_gt(a, b)
+}
+
+/// Any Elements Greater Than
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_any_gt<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    !vec_all_le(a, b)
+}
+
+/// Any Elements Greater Than or Equal
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_any_ge<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    !vec_all_lt(a, b)
+}
+
+/// Any Elements Not Less Than
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_any_nlt<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    vec_any_ge(a, b)
+}
+
+/// Any Elements Not Less Than or Equal
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_any_nle<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    vec_any_gt(a, b)
+}
+
+/// Any Elements Not Greater Than
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_any_ngt<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    vec_any_le(a, b)
+}
+
+/// Any Elements Not Greater Than or Equal
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_any_nge<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    vec_any_lt(a, b)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;

From 11d56df7006c9c5e03e5c3b109689bf9b2428255 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Sat, 8 Mar 2025 19:37:14 +0100
Subject: [PATCH 53/57] add `vec_mulo`

---
 crates/core_arch/src/s390x/vector.rs | 49 ++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index b9bff813dc..b654d265cc 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -201,6 +201,14 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vmleh"] fn vmleh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int;
     #[link_name = "llvm.s390.vmlef"] fn vmlef(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long;
 
+    #[link_name = "llvm.s390.vmob"] fn vmob(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short;
+    #[link_name = "llvm.s390.vmoh"] fn vmoh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int;
+    #[link_name = "llvm.s390.vmof"] fn vmof(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long;
+
+    #[link_name = "llvm.s390.vmlob"] fn vmlob(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short;
+    #[link_name = "llvm.s390.vmloh"] fn vmloh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int;
+    #[link_name = "llvm.s390.vmlof"] fn vmlof(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long;
+
     #[link_name = "llvm.s390.vgfmb"] fn vgfmb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short;
     #[link_name = "llvm.s390.vgfmh"] fn vgfmh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int;
     #[link_name = "llvm.s390.vgfmf"] fn vgfmf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long;
@@ -2620,6 +2628,27 @@ mod sealed {
     impl_mul!([VectorMule vec_mule] vec_vmleh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int);
     impl_mul!([VectorMule vec_mule] vec_vmlef (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long );
 
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorMulo<Result> {
+        unsafe fn vec_mulo(self, b: Self) -> Result;
+    }
+
+    test_impl! { vec_vmob(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short [ vmob, vmob ] }
+    test_impl! { vec_vmoh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int[ vmoh, vmoh ] }
+    test_impl! { vec_vmof(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long [ vmof, vmof ] }
+
+    test_impl! { vec_vmlob(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short [ vmlob, vmlob ] }
+    test_impl! { vec_vmloh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int[ vmloh, vmloh ] }
+    test_impl! { vec_vmlof(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long [ vmlof, vmlof ] }
+
+    impl_mul!([VectorMulo vec_mulo] vec_vmob (vector_signed_char, vector_signed_char) -> vector_signed_short );
+    impl_mul!([VectorMulo vec_mulo] vec_vmoh (vector_signed_short, vector_signed_short) -> vector_signed_int);
+    impl_mul!([VectorMulo vec_mulo] vec_vmof (vector_signed_int, vector_signed_int) -> vector_signed_long_long );
+
+    impl_mul!([VectorMulo vec_mulo] vec_vmlob (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_short );
+    impl_mul!([VectorMulo vec_mulo] vec_vmloh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int);
+    impl_mul!([VectorMulo vec_mulo] vec_vmlof (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long );
+
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorGfmsum<Result> {
         unsafe fn vec_gfmsum(self, b: Self) -> Result;
@@ -4728,6 +4757,14 @@ pub unsafe fn vec_mule<T: sealed::VectorMule<U>, U>(a: T, b: T) -> U {
     a.vec_mule(b)
 }
 
+/// Vector Multiply Odd
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_mulo<T: sealed::VectorMulo<U>, U>(a: T, b: T) -> U {
+    a.vec_mulo(b)
+}
+
 /// Vector Galois Field Multiply Sum
 #[inline]
 #[target_feature(enable = "vector")]
@@ -6394,6 +6431,18 @@ mod tests {
         [0x4000_0000, -8, 0xFFFE, 2]
     }
 
+    test_vec_2! { test_vec_mulo_u, vec_mulo, u16x8, u16x8 -> u32x4,
+        [0, 0xFFFF, 0, 2, 0, 2, 0, 1],
+        [0, 0xFFFF, 0, 4, 0, 0xFFFF, 0, 2],
+        [0xFFFE_0001, 8, 0x0001_FFFE, 2]
+    }
+
+    test_vec_2! { test_vec_mulo_i, vec_mulo, i16x8, i16x8 -> i32x4,
+        [0, i16::MIN, 0, -2, 0, 2, 0, 1],
+        [0, i16::MIN, 0, 4, 0, i16::MAX, 0, 2],
+        [0x4000_0000, -8, 0xFFFE, 2]
+    }
+
     test_vec_2! { test_vec_gfmsum_1, vec_gfmsum, u16x8, u16x8 -> u32x4,
         [0x1234, 0x5678, 0x9ABC, 0xDEF0, 0x1357, 0x2468, 0xACE0, 0xBDF0],
         [0xFFFF, 0x0001, 0x8000, 0x7FFF, 0xAAAA, 0x5555, 0x1234, 0x5678],

From 1070ae81e271db3b5b19c6902542e360681b3e42 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Sat, 8 Mar 2025 19:47:36 +0100
Subject: [PATCH 54/57] add `vec_mulh`

---
 crates/core_arch/src/s390x/vector.rs | 49 ++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index b654d265cc..45f75dfc46 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -209,6 +209,14 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vmloh"] fn vmloh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int;
     #[link_name = "llvm.s390.vmlof"] fn vmlof(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long;
 
+    #[link_name = "llvm.s390.vmhb"] fn vmhb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char;
+    #[link_name = "llvm.s390.vmhh"] fn vmhh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short;
+    #[link_name = "llvm.s390.vmhf"] fn vmhf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int;
+
+    #[link_name = "llvm.s390.vmlhb"] fn vmlhb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char;
+    #[link_name = "llvm.s390.vmlhh"] fn vmlhh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short;
+    #[link_name = "llvm.s390.vmlhf"] fn vmlhf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int;
+
     #[link_name = "llvm.s390.vgfmb"] fn vgfmb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short;
     #[link_name = "llvm.s390.vgfmh"] fn vgfmh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int;
     #[link_name = "llvm.s390.vgfmf"] fn vgfmf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long;
@@ -2649,6 +2657,27 @@ mod sealed {
     impl_mul!([VectorMulo vec_mulo] vec_vmloh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int);
     impl_mul!([VectorMulo vec_mulo] vec_vmlof (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long );
 
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorMulh<Result> {
+        unsafe fn vec_mulh(self, b: Self) -> Result;
+    }
+
+    test_impl! { vec_vmhb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char [ vmhb, vmhb ] }
+    test_impl! { vec_vmhh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short [ vmhh, vmhh ] }
+    test_impl! { vec_vmhf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int [ vmhf, vmhf ] }
+
+    test_impl! { vec_vmlhb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char [ vmlhb, vmlhb ] }
+    test_impl! { vec_vmlhh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short [ vmlhh, vmlhh ] }
+    test_impl! { vec_vmlhf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int [ vmlhf, vmlhf ] }
+
+    impl_mul!([VectorMulh vec_mulh] vec_vmhb (vector_signed_char, vector_signed_char) -> vector_signed_char);
+    impl_mul!([VectorMulh vec_mulh] vec_vmhh (vector_signed_short, vector_signed_short) -> vector_signed_short);
+    impl_mul!([VectorMulh vec_mulh] vec_vmhf (vector_signed_int, vector_signed_int) -> vector_signed_int);
+
+    impl_mul!([VectorMulh vec_mulh] vec_vmlhb (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char);
+    impl_mul!([VectorMulh vec_mulh] vec_vmlhh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short);
+    impl_mul!([VectorMulh vec_mulh] vec_vmlhf (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int);
+
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorGfmsum<Result> {
         unsafe fn vec_gfmsum(self, b: Self) -> Result;
@@ -4765,6 +4794,14 @@ pub unsafe fn vec_mulo<T: sealed::VectorMulo<U>, U>(a: T, b: T) -> U {
     a.vec_mulo(b)
 }
 
+/// Vector Multiply High
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_mulh<T: sealed::VectorMulh<U>, U>(a: T, b: T) -> U {
+    a.vec_mulh(b)
+}
+
 /// Vector Galois Field Multiply Sum
 #[inline]
 #[target_feature(enable = "vector")]
@@ -6443,6 +6480,18 @@ mod tests {
         [0x4000_0000, -8, 0xFFFE, 2]
     }
 
+    test_vec_2! { test_vec_mulh_u, vec_mulh, u32x4, u32x4 -> u32x4,
+        [u32::MAX, 2, 2, 1],
+        [u32::MAX, 4, u32::MAX, 2],
+        [u32::MAX - 1, 0, 1, 0]
+    }
+
+    test_vec_2! { test_vec_mulh_i, vec_mulh, i32x4, i32x4 -> i32x4,
+        [i32::MIN, -2, 2, 1],
+        [i32::MIN, 4, i32::MAX, 2],
+        [0x4000_0000, -1, 0, 0]
+    }
+
     test_vec_2! { test_vec_gfmsum_1, vec_gfmsum, u16x8, u16x8 -> u32x4,
         [0x1234, 0x5678, 0x9ABC, 0xDEF0, 0x1357, 0x2468, 0xACE0, 0xBDF0],
         [0xFFFF, 0x0001, 0x8000, 0x7FFF, 0xAAAA, 0x5555, 0x1234, 0x5678],

From 052dd81dca097965f8324cd8ccb2dc2f9750559b Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Sat, 8 Mar 2025 20:56:43 +0100
Subject: [PATCH 55/57] add `vec_meadd`, `vec_moadd`, `vec_mhadd` and
 `vec_mladd`

---
 crates/core_arch/src/s390x/vector.rs | 237 +++++++++++++++++++++++++++
 1 file changed, 237 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 45f75dfc46..13f410f534 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -217,6 +217,38 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vmlhh"] fn vmlhh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short;
     #[link_name = "llvm.s390.vmlhf"] fn vmlhf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int;
 
+    #[link_name = "llvm.s390.vmaeb"] fn vmaeb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_short) -> vector_signed_short;
+    #[link_name = "llvm.s390.vmaeh"] fn vmaeh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_int) -> vector_signed_int;
+    #[link_name = "llvm.s390.vmaef"] fn vmaef(a: vector_signed_int, b: vector_signed_int, c: vector_signed_long_long) -> vector_signed_long_long;
+
+    #[link_name = "llvm.s390.vmaleb"] fn vmaleb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_short) -> vector_unsigned_short;
+    #[link_name = "llvm.s390.vmaleh"] fn vmaleh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int) -> vector_unsigned_int;
+    #[link_name = "llvm.s390.vmalef"] fn vmalef(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_long_long) -> vector_unsigned_long_long;
+
+    #[link_name = "llvm.s390.vmaob"] fn vmaob(a: vector_signed_char, b: vector_signed_char, c: vector_signed_short) -> vector_signed_short;
+    #[link_name = "llvm.s390.vmaoh"] fn vmaoh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_int) -> vector_signed_int;
+    #[link_name = "llvm.s390.vmaof"] fn vmaof(a: vector_signed_int, b: vector_signed_int, c: vector_signed_long_long) -> vector_signed_long_long;
+
+    #[link_name = "llvm.s390.vmalob"] fn vmalob(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_short) -> vector_unsigned_short;
+    #[link_name = "llvm.s390.vmaloh"] fn vmaloh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int) -> vector_unsigned_int;
+    #[link_name = "llvm.s390.vmalof"] fn vmalof(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_long_long) -> vector_unsigned_long_long;
+
+    #[link_name = "llvm.s390.vmahb"] fn vmahb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char) -> vector_signed_char;
+    #[link_name = "llvm.s390.vmahh"] fn vmahh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short) -> vector_signed_short;
+    #[link_name = "llvm.s390.vmahf"] fn vmahf(a: vector_signed_int, b: vector_signed_int, c: vector_signed_int) -> vector_signed_int;
+
+    #[link_name = "llvm.s390.vmalhb"] fn vmalhb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char;
+    #[link_name = "llvm.s390.vmalhh"] fn vmalhh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short;
+    #[link_name = "llvm.s390.vmalhf"] fn vmalhf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int;
+
+    #[link_name = "llvm.s390.vmalb"] fn vmalb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char) -> vector_signed_char;
+    #[link_name = "llvm.s390.vmalh"] fn vmalh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short) -> vector_signed_short;
+    #[link_name = "llvm.s390.vmalf"] fn vmalf(a: vector_signed_int, b: vector_signed_int, c: vector_signed_int) -> vector_signed_int;
+
+    #[link_name = "llvm.s390.vmallb"] fn vmallb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char;
+    #[link_name = "llvm.s390.vmallh"] fn vmallh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short;
+    #[link_name = "llvm.s390.vmallf"] fn vmallf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int;
+
     #[link_name = "llvm.s390.vgfmb"] fn vgfmb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short;
     #[link_name = "llvm.s390.vgfmh"] fn vgfmh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int;
     #[link_name = "llvm.s390.vgfmf"] fn vgfmf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long;
@@ -2594,6 +2626,17 @@ mod sealed {
                 }
             }
         };
+        ([$Trait:ident $m:ident] $fun:ident ($a:ty, $b:ty, $c:ty) -> $r:ty) => {
+            #[unstable(feature = "stdarch_s390x", issue = "135681")]
+            impl $Trait for $a {
+                type Result = $r;
+                #[inline]
+                #[target_feature(enable = "vector")]
+                unsafe fn $m(self, b: $b, c: $c) -> $r {
+                    $fun(self, b, c)
+                }
+            }
+        };
     }
 
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
@@ -2678,6 +2721,100 @@ mod sealed {
     impl_mul!([VectorMulh vec_mulh] vec_vmlhh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short);
     impl_mul!([VectorMulh vec_mulh] vec_vmlhf (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int);
 
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorMeadd {
+        type Result;
+        unsafe fn vec_meadd(self, b: Self, c: Self::Result) -> Self::Result;
+    }
+
+    test_impl! { vec_vmaeb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_short) -> vector_signed_short [ vmaeb, vmaeb ] }
+    test_impl! { vec_vmaeh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_int) -> vector_signed_int[ vmaeh, vmaeh ] }
+    test_impl! { vec_vmaef(a: vector_signed_int, b: vector_signed_int, c: vector_signed_long_long) -> vector_signed_long_long [ vmaef, vmaef ] }
+
+    test_impl! { vec_vmaleb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_short) -> vector_unsigned_short [ vmaleb, vmaleb ] }
+    test_impl! { vec_vmaleh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int) -> vector_unsigned_int[ vmaleh, vmaleh ] }
+    test_impl! { vec_vmalef(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_long_long) -> vector_unsigned_long_long [ vmalef, vmalef ] }
+
+    impl_mul!([VectorMeadd vec_meadd] vec_vmaeb (vector_signed_char, vector_signed_char, vector_signed_short) -> vector_signed_short );
+    impl_mul!([VectorMeadd vec_meadd] vec_vmaeh (vector_signed_short, vector_signed_short, vector_signed_int) -> vector_signed_int);
+    impl_mul!([VectorMeadd vec_meadd] vec_vmaef (vector_signed_int, vector_signed_int, vector_signed_long_long) -> vector_signed_long_long );
+
+    impl_mul!([VectorMeadd vec_meadd] vec_vmaleb (vector_unsigned_char, vector_unsigned_char, vector_unsigned_short) -> vector_unsigned_short );
+    impl_mul!([VectorMeadd vec_meadd] vec_vmaleh (vector_unsigned_short, vector_unsigned_short, vector_unsigned_int) -> vector_unsigned_int);
+    impl_mul!([VectorMeadd vec_meadd] vec_vmalef (vector_unsigned_int, vector_unsigned_int, vector_unsigned_long_long) -> vector_unsigned_long_long );
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorMoadd {
+        type Result;
+        unsafe fn vec_moadd(self, b: Self, c: Self::Result) -> Self::Result;
+    }
+
+    test_impl! { vec_vmaob(a: vector_signed_char, b: vector_signed_char, c: vector_signed_short) -> vector_signed_short [ vmaob, vmaob ] }
+    test_impl! { vec_vmaoh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_int) -> vector_signed_int[ vmaoh, vmaoh ] }
+    test_impl! { vec_vmaof(a: vector_signed_int, b: vector_signed_int, c: vector_signed_long_long) -> vector_signed_long_long [ vmaof, vmaof ] }
+
+    test_impl! { vec_vmalob(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_short) -> vector_unsigned_short [ vmalob, vmalob ] }
+    test_impl! { vec_vmaloh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int) -> vector_unsigned_int[ vmaloh, vmaloh ] }
+    test_impl! { vec_vmalof(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_long_long) -> vector_unsigned_long_long [ vmalof, vmalof ] }
+
+    impl_mul!([VectorMoadd vec_moadd] vec_vmaob (vector_signed_char, vector_signed_char, vector_signed_short) -> vector_signed_short );
+    impl_mul!([VectorMoadd vec_moadd] vec_vmaoh (vector_signed_short, vector_signed_short, vector_signed_int) -> vector_signed_int);
+    impl_mul!([VectorMoadd vec_moadd] vec_vmaof (vector_signed_int, vector_signed_int, vector_signed_long_long) -> vector_signed_long_long );
+
+    impl_mul!([VectorMoadd vec_moadd] vec_vmalob (vector_unsigned_char, vector_unsigned_char, vector_unsigned_short) -> vector_unsigned_short );
+    impl_mul!([VectorMoadd vec_moadd] vec_vmaloh (vector_unsigned_short, vector_unsigned_short, vector_unsigned_int) -> vector_unsigned_int);
+    impl_mul!([VectorMoadd vec_moadd] vec_vmalof (vector_unsigned_int, vector_unsigned_int, vector_unsigned_long_long) -> vector_unsigned_long_long );
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorMhadd {
+        type Result;
+        unsafe fn vec_mhadd(self, b: Self, c: Self::Result) -> Self::Result;
+    }
+
+    test_impl! { vec_vmahb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char) -> vector_signed_char [ vmahb, vmahb ] }
+    test_impl! { vec_vmahh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short) -> vector_signed_short[ vmahh, vmahh ] }
+    test_impl! { vec_vmahf(a: vector_signed_int, b: vector_signed_int, c: vector_signed_int) -> vector_signed_int [ vmahf, vmahf ] }
+
+    test_impl! { vec_vmalhb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char [ vmalhb, vmalhb ] }
+    test_impl! { vec_vmalhh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short[ vmalhh, vmalhh ] }
+    test_impl! { vec_vmalhf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int [ vmalhf, vmalhf ] }
+
+    impl_mul!([VectorMhadd vec_mhadd] vec_vmahb (vector_signed_char, vector_signed_char, vector_signed_char) -> vector_signed_char );
+    impl_mul!([VectorMhadd vec_mhadd] vec_vmahh (vector_signed_short, vector_signed_short, vector_signed_short) -> vector_signed_short);
+    impl_mul!([VectorMhadd vec_mhadd] vec_vmahf (vector_signed_int, vector_signed_int, vector_signed_int) -> vector_signed_int );
+
+    impl_mul!([VectorMhadd vec_mhadd] vec_vmalhb (vector_unsigned_char, vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char );
+    impl_mul!([VectorMhadd vec_mhadd] vec_vmalhh (vector_unsigned_short, vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short);
+    impl_mul!([VectorMhadd vec_mhadd] vec_vmalhf (vector_unsigned_int, vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int );
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorMladd {
+        type Result;
+        unsafe fn vec_mladd(self, b: Self, c: Self::Result) -> Self::Result;
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    unsafe fn simd_mladd<T>(a: T, b: T, c: T) -> T {
+        simd_add(simd_mul(a, b), c)
+    }
+
+    test_impl! { vec_vmal_ib(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char) -> vector_signed_char [simd_mladd, vmalb ] }
+    test_impl! { vec_vmal_ih(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short) -> vector_signed_short[simd_mladd, vmalh ] }
+    test_impl! { vec_vmal_if(a: vector_signed_int, b: vector_signed_int, c: vector_signed_int) -> vector_signed_int [simd_mladd, vmalf ] }
+
+    test_impl! { vec_vmal_ub(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char [simd_mladd, vmalb ] }
+    test_impl! { vec_vmal_uh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short[simd_mladd, vmalh ] }
+    test_impl! { vec_vmal_uf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int [simd_mladd, vmalf ] }
+
+    impl_mul!([VectorMladd vec_mladd] vec_vmal_ib (vector_signed_char, vector_signed_char, vector_signed_char) -> vector_signed_char );
+    impl_mul!([VectorMladd vec_mladd] vec_vmal_ih (vector_signed_short, vector_signed_short, vector_signed_short) -> vector_signed_short);
+    impl_mul!([VectorMladd vec_mladd] vec_vmal_if (vector_signed_int, vector_signed_int, vector_signed_int) -> vector_signed_int );
+
+    impl_mul!([VectorMladd vec_mladd] vec_vmal_ub (vector_unsigned_char, vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char );
+    impl_mul!([VectorMladd vec_mladd] vec_vmal_uh (vector_unsigned_short, vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short);
+    impl_mul!([VectorMladd vec_mladd] vec_vmal_uf (vector_unsigned_int, vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int );
+
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorGfmsum<Result> {
         unsafe fn vec_gfmsum(self, b: Self) -> Result;
@@ -4769,6 +4906,38 @@ pub unsafe fn vec_msub<T: sealed::VectorMadd>(a: T, b: T, c: T) -> T {
     a.vec_msub(b, c)
 }
 
+/// Vector Multiply and Add Even
+#[inline]
+#[target_feature(enable = "vector-packed-decimal")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_meadd<T: sealed::VectorMeadd>(a: T, b: T, c: T::Result) -> T::Result {
+    a.vec_meadd(b, c)
+}
+
+/// Vector Multiply and Add Odd
+#[inline]
+#[target_feature(enable = "vector-packed-decimal")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_moadd<T: sealed::VectorMoadd>(a: T, b: T, c: T::Result) -> T::Result {
+    a.vec_moadd(b, c)
+}
+
+/// Vector Multiply and Add High
+#[inline]
+#[target_feature(enable = "vector-packed-decimal")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_mhadd<T: sealed::VectorMhadd>(a: T, b: T, c: T::Result) -> T::Result {
+    a.vec_mhadd(b, c)
+}
+
+/// Vector Multiply and Add Low
+#[inline]
+#[target_feature(enable = "vector-packed-decimal")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_mladd<T: sealed::VectorMladd>(a: T, b: T, c: T::Result) -> T::Result {
+    a.vec_mladd(b, c)
+}
+
 /// Vector Checksum
 #[inline]
 #[target_feature(enable = "vector")]
@@ -7032,4 +7201,72 @@ mod tests {
         [1.0, f32::NAN, 5.0, 3.14],
         [0, !0, !0, !0]
     }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_meadd() {
+        let a = vector_unsigned_short([1, 0, 2, 0, 3, 0, 4, 0]);
+        let b = vector_unsigned_short([5, 0, 6, 0, 7, 0, 8, 0]);
+        let c = vector_unsigned_int([2, 2, 2, 2]);
+
+        let d = unsafe { vec_meadd(a, b, c) };
+        assert_eq!(d.as_array(), &[7, 14, 23, 34]);
+
+        let a = vector_signed_short([1, 0, 2, 0, 3, 0, 4, 0]);
+        let b = vector_signed_short([5, 0, 6, 0, 7, 0, 8, 0]);
+        let c = vector_signed_int([2, -2, 2, -2]);
+
+        let d = unsafe { vec_meadd(a, b, c) };
+        assert_eq!(d.as_array(), &[7, 10, 23, 30]);
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_moadd() {
+        let a = vector_unsigned_short([0, 1, 0, 2, 0, 3, 0, 4]);
+        let b = vector_unsigned_short([0, 5, 0, 6, 0, 7, 0, 8]);
+        let c = vector_unsigned_int([2, 2, 2, 2]);
+
+        let d = unsafe { vec_moadd(a, b, c) };
+        assert_eq!(d.as_array(), &[7, 14, 23, 34]);
+
+        let a = vector_signed_short([0, 1, 0, 2, 0, 3, 0, 4]);
+        let b = vector_signed_short([0, 5, 0, 6, 0, 7, 0, 8]);
+        let c = vector_signed_int([2, -2, 2, -2]);
+
+        let d = unsafe { vec_moadd(a, b, c) };
+        assert_eq!(d.as_array(), &[7, 10, 23, 30]);
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_mhadd() {
+        let a = vector_unsigned_int([1, 2, 3, 4]);
+        let b = vector_unsigned_int([5, 6, 7, 8]);
+        let c = vector_unsigned_int([u32::MAX; 4]);
+
+        let d = unsafe { vec_mhadd(a, b, c) };
+        assert_eq!(d.as_array(), &[1, 1, 1, 1]);
+
+        let a = vector_signed_int([-1, -2, -3, -4]);
+        let b = vector_signed_int([5, 6, 7, 8]);
+        let c = vector_signed_int([i32::MIN; 4]);
+
+        let d = unsafe { vec_mhadd(a, b, c) };
+        assert_eq!(d.as_array(), &[-1, -1, -1, -1]);
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_mladd() {
+        let a = vector_unsigned_int([1, 2, 3, 4]);
+        let b = vector_unsigned_int([5, 6, 7, 8]);
+        let c = vector_unsigned_int([2, 2, 2, 2]);
+
+        let d = unsafe { vec_mladd(a, b, c) };
+        assert_eq!(d.as_array(), &[7, 14, 23, 34]);
+
+        let a = vector_signed_int([-1, -2, -3, -4]);
+        let b = vector_signed_int([5, 6, 7, 8]);
+        let c = vector_signed_int([2, 2, 2, 2]);
+
+        let d = unsafe { vec_mladd(a, b, c) };
+        assert_eq!(d.as_array(), &[-3, -10, -19, -30]);
+    }
 }

From 049b750fc4ad5a0f72a6c40f1c5e2af5ab1754a7 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Sun, 16 Mar 2025 16:38:44 +0100
Subject: [PATCH 56/57] shink the size of type signatures

---
 crates/core_arch/src/s390x/vector.rs | 220 ++++++---------------------
 1 file changed, 44 insertions(+), 176 deletions(-)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 13f410f534..a4df04ed0e 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -3837,10 +3837,7 @@ unsafe fn __lcbb<const BLOCK_BOUNDARY: u16>(ptr: *const u8) -> u32 {
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_add<T, U>(a: T, b: U) -> <T as sealed::VectorAdd<U>>::Result
-where
-    T: sealed::VectorAdd<U>,
-{
+pub unsafe fn vec_add<T: sealed::VectorAdd<U>, U>(a: T, b: U) -> T::Result {
     a.vec_add(b)
 }
 
@@ -3848,10 +3845,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_sub<T, U>(a: T, b: U) -> <T as sealed::VectorSub<U>>::Result
-where
-    T: sealed::VectorSub<U>,
-{
+pub unsafe fn vec_sub<T: sealed::VectorSub<U>, U>(a: T, b: U) -> T::Result {
     a.vec_sub(b)
 }
 
@@ -3865,10 +3859,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_mul<T>(a: T, b: T) -> T
-where
-    T: sealed::VectorMul,
-{
+pub unsafe fn vec_mul<T: sealed::VectorMul>(a: T, b: T) -> T {
     a.vec_mul(b)
 }
 
@@ -3876,10 +3867,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_cntlz<T>(a: T) -> <T as sealed::CountBits>::Result
-where
-    T: sealed::CountBits,
-{
+pub unsafe fn vec_cntlz<T: sealed::CountBits>(a: T) -> T::Result {
     a.vec_cntlz()
 }
 
@@ -3887,10 +3875,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_cnttz<T>(a: T) -> <T as sealed::CountBits>::Result
-where
-    T: sealed::CountBits,
-{
+pub unsafe fn vec_cnttz<T: sealed::CountBits>(a: T) -> T::Result {
     a.vec_cnttz()
 }
 
@@ -3900,10 +3885,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_popcnt<T>(a: T) -> <T as sealed::CountBits>::Result
-where
-    T: sealed::CountBits,
-{
+pub unsafe fn vec_popcnt<T: sealed::CountBits>(a: T) -> T::Result {
     a.vec_popcnt()
 }
 
@@ -3911,10 +3893,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_max<T, U>(a: T, b: U) -> <T as sealed::VectorMax<U>>::Result
-where
-    T: sealed::VectorMax<U>,
-{
+pub unsafe fn vec_max<T: sealed::VectorMax<U>, U>(a: T, b: U) -> T::Result {
     a.vec_max(b)
 }
 
@@ -3922,10 +3901,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_min<T, U>(a: T, b: U) -> <T as sealed::VectorMin<U>>::Result
-where
-    T: sealed::VectorMin<U>,
-{
+pub unsafe fn vec_min<T: sealed::VectorMin<U>, U>(a: T, b: U) -> T::Result {
     a.vec_min(b)
 }
 
@@ -3933,10 +3909,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_abs<T>(a: T) -> T
-where
-    T: sealed::VectorAbs,
-{
+pub unsafe fn vec_abs<T: sealed::VectorAbs>(a: T) -> T {
     a.vec_abs()
 }
 
@@ -3968,10 +3941,7 @@ pub unsafe fn vec_nmsub<T: sealed::VectorNmsub>(a: T, b: T, c: T) -> T {
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_sqrt<T>(a: T) -> T
-where
-    T: sealed::VectorSqrt,
-{
+pub unsafe fn vec_sqrt<T: sealed::VectorSqrt>(a: T) -> T {
     a.vec_sqrt()
 }
 
@@ -3979,10 +3949,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_splat<T, const IMM: u32>(a: T) -> T
-where
-    T: sealed::VectorSplat,
-{
+pub unsafe fn vec_splat<T: sealed::VectorSplat, const IMM: u32>(a: T) -> T {
     a.vec_splat::<IMM>()
 }
 
@@ -3990,10 +3957,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_splats<T, U>(a: T) -> U
-where
-    T: sealed::VectorSplats<U>,
-{
+pub unsafe fn vec_splats<T: sealed::VectorSplats<U>, U>(a: T) -> U {
     a.vec_splats()
 }
 
@@ -4001,10 +3965,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_and<T, U>(a: T, b: U) -> <T as sealed::VectorAnd<U>>::Result
-where
-    T: sealed::VectorAnd<U>,
-{
+pub unsafe fn vec_and<T: sealed::VectorAnd<U>, U>(a: T, b: U) -> T::Result {
     a.vec_and(b)
 }
 
@@ -4012,10 +3973,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_or<T, U>(a: T, b: U) -> <T as sealed::VectorOr<U>>::Result
-where
-    T: sealed::VectorOr<U>,
-{
+pub unsafe fn vec_or<T: sealed::VectorOr<U>, U>(a: T, b: U) -> T::Result {
     a.vec_or(b)
 }
 
@@ -4023,10 +3981,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_xor<T, U>(a: T, b: U) -> <T as sealed::VectorXor<U>>::Result
-where
-    T: sealed::VectorXor<U>,
-{
+pub unsafe fn vec_xor<T: sealed::VectorXor<U>, U>(a: T, b: U) -> T::Result {
     a.vec_xor(b)
 }
 
@@ -4034,10 +3989,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_nor<T, U>(a: T, b: U) -> <T as sealed::VectorNor<U>>::Result
-where
-    T: sealed::VectorNor<U>,
-{
+pub unsafe fn vec_nor<T: sealed::VectorNor<U>, U>(a: T, b: U) -> T::Result {
     a.vec_nor(b)
 }
 
@@ -4045,10 +3997,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_nand<T, U>(a: T, b: U) -> <T as sealed::VectorNand<U>>::Result
-where
-    T: sealed::VectorNand<U>,
-{
+pub unsafe fn vec_nand<T: sealed::VectorNand<U>, U>(a: T, b: U) -> T::Result {
     a.vec_nand(b)
 }
 
@@ -4056,10 +4005,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_eqv<T, U>(a: T, b: U) -> <T as sealed::VectorEqv<U>>::Result
-where
-    T: sealed::VectorEqv<U>,
-{
+pub unsafe fn vec_eqv<T: sealed::VectorEqv<U>, U>(a: T, b: U) -> T::Result {
     a.vec_eqv(b)
 }
 
@@ -4067,10 +4013,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_andc<T, U>(a: T, b: U) -> <T as sealed::VectorAndc<U>>::Result
-where
-    T: sealed::VectorAndc<U>,
-{
+pub unsafe fn vec_andc<T: sealed::VectorAndc<U>, U>(a: T, b: U) -> T::Result {
     a.vec_andc(b)
 }
 
@@ -4084,10 +4027,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_orc<T, U>(a: T, b: U) -> <T as sealed::VectorOrc<U>>::Result
-where
-    T: sealed::VectorOrc<U>,
-{
+pub unsafe fn vec_orc<T: sealed::VectorOrc<U>, U>(a: T, b: U) -> T::Result {
     a.vec_orc(b)
 }
 
@@ -4095,10 +4035,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_floor<T>(a: T) -> T
-where
-    T: sealed::VectorFloor,
-{
+pub unsafe fn vec_floor<T: sealed::VectorFloor>(a: T) -> T {
     a.vec_floor()
 }
 
@@ -4106,10 +4043,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_ceil<T>(a: T) -> T
-where
-    T: sealed::VectorCeil,
-{
+pub unsafe fn vec_ceil<T: sealed::VectorCeil>(a: T) -> T {
     a.vec_ceil()
 }
 
@@ -4118,10 +4052,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_trunc<T>(a: T) -> T
-where
-    T: sealed::VectorTrunc,
-{
+pub unsafe fn vec_trunc<T: sealed::VectorTrunc>(a: T) -> T {
     a.vec_trunc()
 }
 
@@ -4130,10 +4061,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_round<T>(a: T) -> T
-where
-    T: sealed::VectorRound,
-{
+pub unsafe fn vec_round<T: sealed::VectorRound>(a: T) -> T {
     a.vec_round()
 }
 
@@ -4142,10 +4070,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_roundc<T>(a: T) -> T
-where
-    T: sealed::VectorRoundc,
-{
+pub unsafe fn vec_roundc<T: sealed::VectorRoundc>(a: T) -> T {
     a.vec_roundc()
 }
 
@@ -4154,10 +4079,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_roundm<T>(a: T) -> T
-where
-    T: sealed::VectorFloor,
-{
+pub unsafe fn vec_roundm<T: sealed::VectorFloor>(a: T) -> T {
     // the IBM docs note
     //
     // > vec_roundm provides the same functionality as vec_floor, except that vec_roundz would not trigger the IEEE-inexact exception.
@@ -4171,10 +4093,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_roundp<T>(a: T) -> T
-where
-    T: sealed::VectorCeil,
-{
+pub unsafe fn vec_roundp<T: sealed::VectorCeil>(a: T) -> T {
     // the IBM docs note
     //
     // > vec_roundp provides the same functionality as vec_ceil, except that vec_roundz would not trigger the IEEE-inexact exception.
@@ -4188,10 +4107,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_roundz<T>(a: T) -> T
-where
-    T: sealed::VectorTrunc,
-{
+pub unsafe fn vec_roundz<T: sealed::VectorTrunc>(a: T) -> T {
     // the IBM docs note
     //
     // > vec_roundz provides the same functionality as vec_trunc, except that vec_roundz would not trigger the IEEE-inexact exception.
@@ -4204,10 +4120,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_rint<T>(a: T) -> T
-where
-    T: sealed::VectorRint,
-{
+pub unsafe fn vec_rint<T: sealed::VectorRint>(a: T) -> T {
     a.vec_rint()
 }
 
@@ -4223,10 +4136,7 @@ pub unsafe fn vec_avg<T: sealed::VectorAvg<U>, U>(a: T, b: U) -> T::Result {
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_sl<T, U>(a: T, b: U) -> <T as sealed::VectorSl<U>>::Result
-where
-    T: sealed::VectorSl<U>,
-{
+pub unsafe fn vec_sl<T: sealed::VectorSl<U>, U>(a: T, b: U) -> T::Result {
     a.vec_sl(b)
 }
 
@@ -4234,10 +4144,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_sr<T, U>(a: T, b: U) -> <T as sealed::VectorSr<U>>::Result
-where
-    T: sealed::VectorSr<U>,
-{
+pub unsafe fn vec_sr<T: sealed::VectorSr<U>, U>(a: T, b: U) -> T::Result {
     a.vec_sr(b)
 }
 
@@ -4245,10 +4152,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_sra<T, U>(a: T, b: U) -> <T as sealed::VectorSra<U>>::Result
-where
-    T: sealed::VectorSra<U>,
-{
+pub unsafe fn vec_sra<T: sealed::VectorSra<U>, U>(a: T, b: U) -> T::Result {
     a.vec_sra(b)
 }
 
@@ -4256,10 +4160,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_slb<T, U>(a: T, b: U) -> <T as sealed::VectorSlb<U>>::Result
-where
-    T: sealed::VectorSlb<U>,
-{
+pub unsafe fn vec_slb<T: sealed::VectorSlb<U>, U>(a: T, b: U) -> T::Result {
     a.vec_slb(b)
 }
 
@@ -4267,10 +4168,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_srb<T, U>(a: T, b: U) -> <T as sealed::VectorSrb<U>>::Result
-where
-    T: sealed::VectorSrb<U>,
-{
+pub unsafe fn vec_srb<T: sealed::VectorSrb<U>, U>(a: T, b: U) -> T::Result {
     a.vec_srb(b)
 }
 
@@ -4278,10 +4176,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_srab<T, U>(a: T, b: U) -> <T as sealed::VectorSrab<U>>::Result
-where
-    T: sealed::VectorSrab<U>,
-{
+pub unsafe fn vec_srab<T: sealed::VectorSrab<U>, U>(a: T, b: U) -> T::Result {
     a.vec_srab(b)
 }
 
@@ -4289,10 +4184,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_rl<T, U>(a: T, b: U) -> <T as sealed::VectorRl<U>>::Result
-where
-    T: sealed::VectorRl<U>,
-{
+pub unsafe fn vec_rl<T: sealed::VectorRl<U>, U>(a: T, b: U) -> T::Result {
     a.vec_rl(b)
 }
 
@@ -4338,10 +4230,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_rli<T>(a: T, bits: core::ffi::c_ulong) -> T
-where
-    T: sealed::VectorRli,
-{
+pub unsafe fn vec_rli<T: sealed::VectorRli>(a: T, bits: core::ffi::c_ulong) -> T {
     a.vec_rli(bits)
 }
 
@@ -4349,10 +4238,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_reve<T>(a: T) -> T
-where
-    T: sealed::VectorReve,
-{
+pub unsafe fn vec_reve<T: sealed::VectorReve>(a: T) -> T {
     a.vec_reve()
 }
 
@@ -4360,10 +4246,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_revb<T>(a: T) -> T
-where
-    T: sealed::VectorRevb,
-{
+pub unsafe fn vec_revb<T: sealed::VectorRevb>(a: T) -> T {
     a.vec_revb()
 }
 
@@ -4371,10 +4254,7 @@ where
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_mergeh<T>(a: T, b: T) -> T
-where
-    T: sealed::VectorMergeh,
-{
+pub unsafe fn vec_mergeh<T: sealed::VectorMergeh>(a: T, b: T) -> T {
     a.vec_mergeh(b)
 }
 
@@ -4438,10 +4318,7 @@ pub unsafe fn vec_unpackl<T: sealed::VectorUnpackl>(a: T) -> <T as sealed::Vecto
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_mergel<T>(a: T, b: T) -> T
-where
-    T: sealed::VectorMergel,
-{
+pub unsafe fn vec_mergel<T: sealed::VectorMergel>(a: T, b: T) -> T {
     a.vec_mergel(b)
 }
 
@@ -4579,10 +4456,7 @@ pub unsafe fn vec_sub_u128(
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_subc<T, U>(a: T, b: U) -> <T as sealed::VectorSubc<U>>::Result
-where
-    T: sealed::VectorSubc<U>,
-{
+pub unsafe fn vec_subc<T: sealed::VectorSubc<U>, U>(a: T, b: U) -> T::Result {
     a.vec_subc(b)
 }
 
@@ -4771,10 +4645,7 @@ macro_rules! vec_find_any {
             #[inline]
             #[target_feature(enable = "vector")]
             #[unstable(feature = "stdarch_s390x", issue = "135681")]
-            pub unsafe fn $fun<T, U>(a: T, b: U) -> <T as sealed::$Trait<U>>::Result
-            where
-                T: sealed::$Trait<U>,
-            {
+            pub unsafe fn $fun<T: sealed::$Trait<U>, U>(a: T, b: U) -> T::Result {
                 a.$fun(b)
             }
         )*
@@ -4796,10 +4667,7 @@ macro_rules! vec_find_any_cc {
             #[inline]
             #[target_feature(enable = "vector")]
             #[unstable(feature = "stdarch_s390x", issue = "135681")]
-            pub unsafe fn $fun<T, U>(a: T, b: U, c: *mut i32) -> <T as sealed::$Trait<U>>::Result
-            where
-                T: sealed::$Trait<U>,
-            {
+            pub unsafe fn $fun<T: sealed::$Trait<U>, U>(a: T, b: U, c: *mut i32) -> T::Result {
                 a.$fun(b, c)
             }
         )*

From 5e2f119f26975fc41505168dfa813d73f950adf5 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Sun, 16 Mar 2025 17:02:35 +0100
Subject: [PATCH 57/57] move unsafe pointer writes to the surface

---
 crates/core_arch/src/s390x/vector.rs | 133 ++++++++++++++-------------
 1 file changed, 70 insertions(+), 63 deletions(-)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index a4df04ed0e..5103273ed9 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -1929,10 +1929,9 @@ mod sealed {
                     type Result = $r;
                     #[inline]
                     #[target_feature(enable = "vector")]
-                    unsafe fn $m(self, b: Self, c: *mut i32) -> Self::Result {
+                    unsafe fn $m(self, b: Self) -> (Self::Result, i32) {
                         let PackedTuple { x, y } = $fun::<{ FindImm::$imm as i32 }>(transmute(self), transmute(b));
-                        c.write(y);
-                        transmute(x)
+                        (transmute(x), y)
                     }
                 }
             )*
@@ -1959,10 +1958,9 @@ mod sealed {
                     type Result = t_b!($ty);
                     #[inline]
                     #[target_feature(enable = "vector")]
-                    unsafe fn $m(self, b: Self, c: *mut i32) -> Self::Result {
+                    unsafe fn $m(self, b: Self) -> (Self::Result, i32) {
                         let PackedTuple { x, y } = $fun::<{ FindImm::$imm as i32 }>(transmute(self), transmute(b));
-                        c.write(y);
-                        transmute(x)
+                        (transmute(x), y)
                     }
                 }
             )*
@@ -2126,7 +2124,7 @@ mod sealed {
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorFindAnyEqCC<Other> {
         type Result;
-        unsafe fn vec_find_any_eq_cc(self, other: Other, c: *mut i32) -> Self::Result;
+        unsafe fn vec_find_any_eq_cc(self, other: Other) -> (Self::Result, i32);
     }
 
     impl_vfae! { [cc VectorFindAnyEqCC vec_find_any_eq_cc] Eq vfaebs vfaehs vfaefs }
@@ -2134,7 +2132,7 @@ mod sealed {
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorFindAnyNeCC<Other> {
         type Result;
-        unsafe fn vec_find_any_ne_cc(self, other: Other, c: *mut i32) -> Self::Result;
+        unsafe fn vec_find_any_ne_cc(self, other: Other) -> (Self::Result, i32);
     }
 
     impl_vfae! { [cc VectorFindAnyNeCC vec_find_any_ne_cc] Ne vfaebs vfaehs vfaefs }
@@ -2142,7 +2140,7 @@ mod sealed {
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorFindAnyEqIdxCC<Other> {
         type Result;
-        unsafe fn vec_find_any_eq_idx_cc(self, other: Other, c: *mut i32) -> Self::Result;
+        unsafe fn vec_find_any_eq_idx_cc(self, other: Other) -> (Self::Result, i32);
     }
 
     impl_vfae! { [idx_cc VectorFindAnyEqIdxCC vec_find_any_eq_idx_cc] EqIdx vfaebs vfaehs vfaefs }
@@ -2150,7 +2148,7 @@ mod sealed {
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorFindAnyNeIdxCC<Other> {
         type Result;
-        unsafe fn vec_find_any_ne_idx_cc(self, other: Other, c: *mut i32) -> Self::Result;
+        unsafe fn vec_find_any_ne_idx_cc(self, other: Other) -> (Self::Result, i32);
     }
 
     impl_vfae! { [idx_cc VectorFindAnyNeIdxCC vec_find_any_ne_idx_cc] NeIdx vfaebs vfaehs vfaefs }
@@ -2158,7 +2156,7 @@ mod sealed {
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorFindAnyEqOrZeroIdxCC<Other> {
         type Result;
-        unsafe fn vec_find_any_eq_or_0_idx_cc(self, other: Other, c: *mut i32) -> Self::Result;
+        unsafe fn vec_find_any_eq_or_0_idx_cc(self, other: Other) -> (Self::Result, i32);
     }
 
     impl_vfae! { [idx_cc VectorFindAnyEqOrZeroIdxCC vec_find_any_eq_or_0_idx_cc] EqIdx vfaezbs vfaezhs vfaezfs }
@@ -2166,7 +2164,7 @@ mod sealed {
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorFindAnyNeOrZeroIdxCC<Other> {
         type Result;
-        unsafe fn vec_find_any_ne_or_0_idx_cc(self, other: Other, c: *mut i32) -> Self::Result;
+        unsafe fn vec_find_any_ne_or_0_idx_cc(self, other: Other) -> (Self::Result, i32);
     }
 
     impl_vfae! { [idx_cc VectorFindAnyNeOrZeroIdxCC vec_find_any_ne_or_0_idx_cc] NeIdx vfaezbs vfaezhs vfaezfs }
@@ -2423,11 +2421,9 @@ mod sealed {
                 unsafe fn $intr(
                     a: $ty,
                     b: $ty,
-                    c: *mut i32,
-                ) -> $outty {
+                ) -> ($outty, i32) {
                     let PackedTuple { x, y } = super::$intr(a, b);
-                    c.write(y);
-                    x
+                    (x, y)
                 }
 
                 #[unstable(feature = "stdarch_s390x", issue = "135681")]
@@ -2436,8 +2432,8 @@ mod sealed {
 
                     #[inline]
                     #[target_feature(enable = "vector")]
-                    unsafe fn vec_packs_cc(self, b: Self, c: *mut i32) -> Self::Result {
-                        $intr(self, b, c)
+                    unsafe fn vec_packs_cc(self, b: Self) -> (Self::Result, i32) {
+                        $intr(self, b)
                     }
                 }
             )*
@@ -2447,7 +2443,7 @@ mod sealed {
     #[unstable(feature = "stdarch_powerpc", issue = "111145")]
     pub trait VectorPacksCC {
         type Result;
-        unsafe fn vec_packs_cc(self, b: Self, c: *mut i32) -> Self::Result;
+        unsafe fn vec_packs_cc(self, b: Self) -> (Self::Result, i32);
     }
 
     impl_vector_packs_cc! {
@@ -2468,8 +2464,8 @@ mod sealed {
 
                     #[inline]
                     #[target_feature(enable = "vector")]
-                    unsafe fn vec_packsu_cc(self, b: Self, c: *mut i32) -> Self::Result {
-                        $intr(self, b, c)
+                    unsafe fn vec_packsu_cc(self, b: Self) -> (Self::Result, i32) {
+                        $intr(self, b)
                     }
                 }
             )*
@@ -2479,7 +2475,7 @@ mod sealed {
     #[unstable(feature = "stdarch_powerpc", issue = "111145")]
     pub trait VectorPacksuCC {
         type Result;
-        unsafe fn vec_packsu_cc(self, b: Self, c: *mut i32) -> Self::Result;
+        unsafe fn vec_packsu_cc(self, b: Self) -> (Self::Result, i32);
     }
 
     impl_vector_packsu_cc! {
@@ -3187,15 +3183,13 @@ mod sealed {
             self,
             b: Self,
             c: vector_unsigned_char,
-            d: *mut i32,
-        ) -> vector_unsigned_char;
+        ) -> (vector_unsigned_char, i32);
 
         unsafe fn vec_search_string_until_zero_cc(
             self,
             b: Self,
             c: vector_unsigned_char,
-            d: *mut i32,
-        ) -> vector_unsigned_char;
+        ) -> (vector_unsigned_char, i32);
     }
 
     macro_rules! impl_vec_search_string{
@@ -3205,18 +3199,16 @@ mod sealed {
                 impl VectorSearchString for $ty {
                     #[inline]
                     #[target_feature(enable = "vector")]
-                    unsafe fn vec_search_string_cc(self, b: Self, c: vector_unsigned_char, d: *mut i32) -> vector_unsigned_char {
+                    unsafe fn vec_search_string_cc(self, b: Self, c: vector_unsigned_char) -> (vector_unsigned_char, i32) {
                         let PackedTuple { x,y } = $intr_s(transmute(self), transmute(b), c);
-                        d.write(y);
-                        x
+                        (x, y)
                     }
 
                     #[inline]
                     #[target_feature(enable = "vector")]
-                    unsafe fn vec_search_string_until_zero_cc(self, b: Self, c: vector_unsigned_char, d: *mut i32) -> vector_unsigned_char {
+                    unsafe fn vec_search_string_until_zero_cc(self, b: Self, c: vector_unsigned_char) -> (vector_unsigned_char, i32) {
                         let PackedTuple { x,y } = $intr_sz(transmute(self), transmute(b), c);
-                        d.write(y);
-                        x
+                        (x, y)
                     }
                 }
 
@@ -3435,8 +3427,8 @@ mod sealed {
     impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrf (vector_unsigned_int) }
 
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
-    pub trait VectorCopyUntilZeroCC {
-        unsafe fn vec_cp_until_zero_cc(self, cc: *mut i32) -> Self;
+    pub trait VectorCopyUntilZeroCC: Sized {
+        unsafe fn vec_cp_until_zero_cc(self) -> (Self, i32);
     }
 
     test_impl! { vec_vistrbs (a: vector_unsigned_char) -> PackedTuple<vector_unsigned_char, i32> [vistrbs, vistrbs] }
@@ -3450,10 +3442,9 @@ mod sealed {
                 impl VectorCopyUntilZeroCC for $ty {
                     #[inline]
                     #[target_feature(enable = "vector")]
-                    unsafe fn vec_cp_until_zero_cc(self, cc: *mut i32) -> Self {
+                    unsafe fn vec_cp_until_zero_cc(self) -> (Self, i32) {
                         let PackedTuple { x,y } = $intr(transmute(self));
-                        cc.write(y);
-                        transmute(x)
+                        (transmute(x), y)
                     }
                 }
 
@@ -3729,14 +3720,14 @@ mod sealed {
         unsafe fn vec_cmpeq_idx(self, other: Self) -> Self::Result;
         unsafe fn vec_cmpne_idx(self, other: Self) -> Self::Result;
 
-        unsafe fn vec_cmpeq_idx_cc(self, other: Self, cc: *mut i32) -> Self::Result;
-        unsafe fn vec_cmpne_idx_cc(self, other: Self, cc: *mut i32) -> Self::Result;
+        unsafe fn vec_cmpeq_idx_cc(self, other: Self) -> (Self::Result, i32);
+        unsafe fn vec_cmpne_idx_cc(self, other: Self) -> (Self::Result, i32);
 
         unsafe fn vec_cmpeq_or_0_idx(self, other: Self) -> Self::Result;
         unsafe fn vec_cmpne_or_0_idx(self, other: Self) -> Self::Result;
 
-        unsafe fn vec_cmpeq_or_0_idx_cc(self, other: Self, cc: *mut i32) -> Self::Result;
-        unsafe fn vec_cmpne_or_0_idx_cc(self, other: Self, cc: *mut i32) -> Self::Result;
+        unsafe fn vec_cmpeq_or_0_idx_cc(self, other: Self) -> (Self::Result, i32);
+        unsafe fn vec_cmpne_or_0_idx_cc(self, other: Self) -> (Self::Result, i32);
     }
 
     macro_rules! impl_compare_equality_idx {
@@ -3777,34 +3768,30 @@ mod sealed {
 
                     #[inline]
                     #[target_feature(enable = "vector")]
-                    unsafe fn vec_cmpeq_idx_cc(self, other: Self, cc: *mut i32) -> Self::Result {
+                    unsafe fn vec_cmpeq_idx_cc(self, other: Self) -> (Self::Result, i32) {
                         let PackedTuple { x, y } = $cmpeq_cc(transmute(self), transmute(other));
-                        *cc = y;
-                        transmute(x)
+                        (transmute(x), y)
                     }
 
                     #[inline]
                     #[target_feature(enable = "vector")]
-                    unsafe fn vec_cmpne_idx_cc(self, other: Self, cc: *mut i32) -> Self::Result {
+                    unsafe fn vec_cmpne_idx_cc(self, other: Self) -> (Self::Result, i32) {
                         let PackedTuple { x, y } = $cmpne_cc(transmute(self), transmute(other));
-                        *cc = y;
-                        transmute(x)
+                        (transmute(x),y)
                     }
 
                     #[inline]
                     #[target_feature(enable = "vector")]
-                    unsafe fn vec_cmpeq_or_0_idx_cc(self, other: Self, cc: *mut i32) -> Self::Result {
+                    unsafe fn vec_cmpeq_or_0_idx_cc(self, other: Self) -> (Self::Result, i32) {
                         let PackedTuple { x, y } = $cmpeq_or_0_cc(transmute(self), transmute(other));
-                        *cc = y;
-                        transmute(x)
+                        (transmute(x), y)
                     }
 
                     #[inline]
                     #[target_feature(enable = "vector")]
-                    unsafe fn vec_cmpne_or_0_idx_cc(self, other: Self, cc: *mut i32) -> Self::Result {
+                    unsafe fn vec_cmpne_or_0_idx_cc(self, other: Self) -> (Self::Result, i32) {
                         let PackedTuple { x, y } = $cmpne_or_0_cc(transmute(self), transmute(other));
-                        *cc = y;
-                        transmute(x)
+                        (transmute(x),y)
                     }
                 }
             )*
@@ -4279,7 +4266,9 @@ pub unsafe fn vec_packs<T: sealed::VectorPacks<U>, U>(a: T, b: U) -> T::Result {
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
 pub unsafe fn vec_packs_cc<T: sealed::VectorPacksCC>(a: T, b: T, c: *mut i32) -> T::Result {
-    a.vec_packs_cc(b, c)
+    let (x, y) = a.vec_packs_cc(b);
+    unsafe { c.write(y) };
+    x
 }
 
 /// Vector Pack Saturated Unsigned
@@ -4295,7 +4284,9 @@ pub unsafe fn vec_packsu<T: sealed::VectorPacksu<U>, U>(a: T, b: U) -> T::Result
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
 pub unsafe fn vec_packsu_cc<T: sealed::VectorPacksuCC>(a: T, b: T, c: *mut i32) -> T::Result {
-    a.vec_packsu_cc(b, c)
+    let (x, y) = a.vec_packsu_cc(b);
+    unsafe { c.write(y) };
+    x
 }
 
 /// Vector Unpack High
@@ -4668,7 +4659,9 @@ macro_rules! vec_find_any_cc {
             #[target_feature(enable = "vector")]
             #[unstable(feature = "stdarch_s390x", issue = "135681")]
             pub unsafe fn $fun<T: sealed::$Trait<U>, U>(a: T, b: U, c: *mut i32) -> T::Result {
-                a.$fun(b, c)
+                let (x, y) = a.$fun(b);
+                unsafe { c.write(y) };
+                x
             }
         )*
     }
@@ -5021,7 +5014,9 @@ pub unsafe fn vec_search_string_cc<T: sealed::VectorSearchString>(
     c: vector_unsigned_char,
     d: *mut i32,
 ) -> vector_unsigned_char {
-    a.vec_search_string_cc(b, c, d)
+    let (x, y) = a.vec_search_string_cc(b, c);
+    unsafe { d.write(y) };
+    x
 }
 
 /// Vector Search String Until Zero
@@ -5034,7 +5029,9 @@ pub unsafe fn vec_search_string_until_zero_cc<T: sealed::VectorSearchString>(
     c: vector_unsigned_char,
     d: *mut i32,
 ) -> vector_unsigned_char {
-    a.vec_search_string_until_zero_cc(b, c, d)
+    let (x, y) = a.vec_search_string_until_zero_cc(b, c);
+    unsafe { d.write(y) };
+    x
 }
 
 /// Vector Convert from float (even elements) to double
@@ -5116,7 +5113,9 @@ pub unsafe fn vec_cp_until_zero<T: sealed::VectorCopyUntilZero>(a: T) -> T {
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
 pub unsafe fn vec_cp_until_zero_cc<T: sealed::VectorCopyUntilZeroCC>(a: T, cc: *mut i32) -> T {
-    a.vec_cp_until_zero_cc(cc)
+    let (x, y) = a.vec_cp_until_zero_cc();
+    unsafe { cc.write(y) };
+    x
 }
 
 /// Vector Multiply Sum Logical
@@ -5384,7 +5383,9 @@ pub unsafe fn vec_cmpeq_idx_cc<T: sealed::VectorEqualityIdx>(
     b: T,
     cc: *mut i32,
 ) -> T::Result {
-    a.vec_cmpeq_idx_cc(b, cc)
+    let (x, y) = a.vec_cmpeq_idx_cc(b);
+    unsafe { cc.write(y) };
+    x
 }
 /// Vector Compare Not Equal Index with Condition Code
 #[inline]
@@ -5395,7 +5396,9 @@ pub unsafe fn vec_cmpne_idx_cc<T: sealed::VectorEqualityIdx>(
     b: T,
     cc: *mut i32,
 ) -> T::Result {
-    a.vec_cmpne_idx_cc(b, cc)
+    let (x, y) = a.vec_cmpne_idx_cc(b);
+    unsafe { cc.write(y) };
+    x
 }
 /// Vector Compare Equal or Zero Index
 #[inline]
@@ -5420,7 +5423,9 @@ pub unsafe fn vec_cmpeq_or_0_idx_cc<T: sealed::VectorEqualityIdx>(
     b: T,
     cc: *mut i32,
 ) -> T::Result {
-    a.vec_cmpeq_or_0_idx_cc(b, cc)
+    let (x, y) = a.vec_cmpeq_or_0_idx_cc(b);
+    unsafe { cc.write(y) };
+    x
 }
 /// Vector Compare Not Equal or Zero Index with Condition Code
 #[inline]
@@ -5431,7 +5436,9 @@ pub unsafe fn vec_cmpne_or_0_idx_cc<T: sealed::VectorEqualityIdx>(
     b: T,
     cc: *mut i32,
 ) -> T::Result {
-    a.vec_cmpne_or_0_idx_cc(b, cc)
+    let (x, y) = a.vec_cmpne_or_0_idx_cc(b);
+    unsafe { cc.write(y) };
+    x
 }
 
 /// All Elements Equal